From 2e9c1d6d55beb7bbb1101b8bf084d63f9007af49 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Mon, 18 Oct 2010 13:14:59 +0100 Subject: [PATCH 01/69] Enhancement #7207 - Add possibility of deleting items in check_library --- src/calibre/gui2/dialogs/check_library.py | 62 +++++++++++++++++++---- src/calibre/library/check_library.py | 58 ++++++++++----------- src/calibre/library/cli.py | 4 +- 3 files changed, 79 insertions(+), 45 deletions(-) diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py index 46071d3c06..741a42893d 100644 --- a/src/calibre/gui2/dialogs/check_library.py +++ b/src/calibre/gui2/dialogs/check_library.py @@ -3,11 +3,15 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __license__ = 'GPL v3' +import os + from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \ QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \ - QLineEdit + QLineEdit, Qt +from calibre.gui2.dialogs.confirm_delete import confirm from calibre.library.check_library import CheckLibrary, CHECKS +from calibre.library.database2 import delete_file class Item(QTreeWidgetItem): pass @@ -24,23 +28,28 @@ class CheckLibraryDialog(QDialog): self.setLayout(self._layout) self.log = QTreeWidget(self) + self.log.itemChanged.connect(self.item_changed) self._layout.addWidget(self.log) - self.check = QPushButton(_('Run the check')) + self.check = QPushButton(_('&Run the check')) self.check.setDefault(False) self.check.clicked.connect(self.run_the_check) - self.copy = QPushButton(_('Copy to clipboard')) + self.copy = QPushButton(_('Copy &to clipboard')) self.copy.setDefault(False) self.copy.clicked.connect(self.copy_to_clipboard) self.ok = QPushButton('&Done') self.ok.setDefault(True) self.ok.clicked.connect(self.accept) + self.delete = QPushButton('Delete &marked') + self.delete.setDefault(False) + self.delete.clicked.connect(self.delete_marked) self.cancel = QPushButton('&Cancel') self.cancel.setDefault(False) self.cancel.clicked.connect(self.reject) self.bbox = QDialogButtonBox(self) self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole) self.bbox.addButton(self.check, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.delete, QDialogButtonBox.ActionRole) self.bbox.addButton(self.cancel, QDialogButtonBox.RejectRole) self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole) @@ -83,35 +92,66 @@ class CheckLibraryDialog(QDialog): plaintext = [] def builder(tree, checker, check): - attr = check[0] + attr, h, checkable = check list = getattr(checker, attr, None) if list is None: return - h = check[1] tl = Item([h]) for problem in list: it = Item() + if checkable: + it.setFlags(Qt.ItemIsEnabled | Qt.ItemIsUserCheckable) + it.setCheckState(1, False) + else: + it.setFlags(Qt.ItemIsEnabled) it.setText(0, problem[0]) it.setText(1, problem[1]) - p = ', '.join(problem[2]) - it.setText(2, p) tl.addChild(it) - plaintext.append(','.join([h, problem[0], problem[1], p])) + self.all_items.append(it) + plaintext.append(','.join([h, problem[0], problem[1]])) tree.addTopLevelItem(tl) t = self.log t.clear() - t.setColumnCount(3); - t.setHeaderLabels([_('Name'), _('Path from library'), _('Additional Information')]) + t.setColumnCount(2); + t.setHeaderLabels([_('Name'), _('Path from library')]) + self.all_items = [] for check in CHECKS: builder(t, checker, check) t.setColumnWidth(0, 200) t.setColumnWidth(1, 400) - + self.delete.setEnabled(False) self.text_results = '\n'.join(plaintext) + def item_changed(self, item, column): + print 'item_changed' + for it in self.all_items: + if it.checkState(1): + self.delete.setEnabled(True) + return + + def delete_marked(self): + print 'delete marked' + if not confirm('

'+_('The marked files and folders will be ' + 'permanently deleted. Are you sure?') + +'

', 'check_library_editor_delete', self): + return + + # Sort the paths in reverse length order so that we can be sure that + # if an item is in another item, the sub-item will be deleted first. + items = sorted(self.all_items, + key=lambda x: len(x.text(1)), + reverse=True) + for it in items: + if it.checkState(1): + try: + delete_file(os.path.join(self.db.library_path ,unicode(it.text(1)))) + except: + print 'failed to delete', os.path.join(self.db.library_path ,unicode(it.text(1))) + self.run_the_check() + def copy_to_clipboard(self): QApplication.clipboard().setText(self.text_results) diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py index a6f3d40131..85f3d4747c 100644 --- a/src/calibre/library/check_library.py +++ b/src/calibre/library/check_library.py @@ -14,14 +14,14 @@ from calibre.ebooks import BOOK_EXTENSIONS EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS) NORMALS = frozenset(['metadata.opf', 'cover.jpg']) -CHECKS = [('invalid_titles', _('Invalid titles')), - ('extra_titles', _('Extra titles')), - ('invalid_authors', _('Invalid authors')), - ('extra_authors', _('Extra authors')), - ('missing_formats', _('Missing book formats')), - ('extra_formats', _('Extra book formats')), - ('extra_files', _('Unknown files in books')), - ('failed_folders', _('Folders raising exception')) +CHECKS = [('invalid_titles', _('Invalid titles'), True), + ('extra_titles', _('Extra titles'), True), + ('invalid_authors', _('Invalid authors'), True), + ('extra_authors', _('Extra authors'), True), + ('missing_formats', _('Missing book formats'), False), + ('extra_formats', _('Extra book formats'), True), + ('extra_files', _('Unknown files in books'), True), + ('failed_folders', _('Folders raising exception'), False) ] @@ -41,7 +41,6 @@ class CheckLibrary(object): self.all_lc_dbpaths = frozenset([f.lower() for f in self.all_dbpaths]) self.db_id_regexp = re.compile(r'^.* \((\d+)\)$') - self.bad_ext_pat = re.compile(r'[^a-z0-9]+') self.dirs = [] self.book_dirs = [] @@ -78,7 +77,7 @@ class CheckLibrary(object): auth_path = os.path.join(lib, auth_dir) # First check: author must be a directory if not os.path.isdir(auth_path): - self.invalid_authors.append((auth_dir, auth_dir, [])) + self.invalid_authors.append((auth_dir, auth_dir)) continue self.potential_authors[auth_dir] = {} @@ -93,7 +92,7 @@ class CheckLibrary(object): m = self.db_id_regexp.search(title_dir) # Second check: title must have an ID and must be a directory if m is None or not os.path.isdir(title_path): - self.invalid_titles.append((auth_dir, db_path, [title_dir])) + self.invalid_titles.append((auth_dir, db_path)) continue id = m.group(1) @@ -101,12 +100,12 @@ class CheckLibrary(object): if self.is_case_sensitive: if int(id) not in self.all_ids or \ db_path not in self.all_dbpaths: - self.extra_titles.append((title_dir, db_path, [])) + self.extra_titles.append((title_dir, db_path)) continue else: if int(id) not in self.all_ids or \ db_path.lower() not in self.all_lc_dbpaths: - self.extra_titles.append((title_dir, db_path, [])) + self.extra_titles.append((title_dir, db_path)) continue # Record the book to check its formats @@ -115,7 +114,7 @@ class CheckLibrary(object): # Fourth check: author directories that contain no titles if not found_titles: - self.extra_authors.append((auth_dir, auth_dir, [])) + self.extra_authors.append((auth_dir, auth_dir)) for x in self.book_dirs: try: @@ -132,9 +131,7 @@ class CheckLibrary(object): ext = ext[1:].lower() if ext in EBOOK_EXTENSIONS: return True - if self.bad_ext_pat.search(ext) is not None: - return False - return True + return False def process_book(self, lib, book_info): (db_path, title_dir, book_id) = book_info @@ -148,18 +145,18 @@ class CheckLibrary(object): if self.is_case_sensitive: unknowns = frozenset(filenames-formats-NORMALS) # Check: any books that aren't formats or normally there? - if unknowns: - self.extra_files.append((title_dir, db_path, unknowns)) + for u in unknowns: + self.extra_files.append((title_dir, os.path.join(db_path, u))) # Check: any book formats that should be there? missing = book_formats - formats - if missing: - self.missing_formats.append((title_dir, db_path, missing)) + for m in missing: + self.missing_formats.append((title_dir, os.path.join(db_path, m))) # Check: any book formats that shouldn't be there? extra = formats - book_formats - NORMALS - if extra: - self.extra_formats.append((title_dir, db_path, extra)) + for e in extra: + self.extra_formats.append((title_dir, os.path.join(db_path, e))) else: def lc_map(fnames, fset): m = {} @@ -171,19 +168,16 @@ class CheckLibrary(object): formats_lc = frozenset([f.lower() for f in formats]) unknowns = frozenset(filenames_lc-formats_lc-NORMALS) # Check: any books that aren't formats or normally there? - if unknowns: - self.extra_files.append((title_dir, db_path, - lc_map(filenames, unknowns))) + for f in lc_map(filenames, unknowns): + self.extra_files.append((title_dir, os.path.join(db_path, f))) book_formats_lc = frozenset([f.lower() for f in book_formats]) # Check: any book formats that should be there? missing = book_formats_lc - formats_lc - if missing: - self.missing_formats.append((title_dir, db_path, - lc_map(book_formats, missing))) + for m in lc_map(book_formats, missing): + self.missing_formats.append((title_dir, os.path.join(db_path, m))) # Check: any book formats that shouldn't be there? extra = formats_lc - book_formats_lc - NORMALS - if extra: - self.extra_formats.append((title_dir, db_path, - lc_map(formats, extra))) + for e in lc_map(formats, extra): + self.extra_formats.append((title_dir, os.path.join(db_path, e))) diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 08fd0fbf86..7d3fb329e0 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -943,11 +943,11 @@ def command_check_library(args, dbpath): return if opts.csv: for i in list: - print check[1] + ',' + i[0] + ',' + i[1] + ',' + '|'.join(i[2]) + print check[1] + ',' + i[0] + ',' + i[1] else: print check[1] for i in list: - print ' %-30.30s - %-30.30s - %s'%(i[0], i[1], ', '.join(i[2])) + print ' %-40.40s - %-40.40s'%(i[0], i[1]) db = LibraryDatabase2(dbpath) checker = CheckLibrary(dbpath, db) From 201a3d195805dfcf5b64316d21c3786c076a3068 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 09:46:00 -0600 Subject: [PATCH 02/69] ... --- src/calibre/gui2/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 47bb61a7dc..8398257bde 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -514,7 +514,7 @@ class FileDialog(QObject): if f and os.path.exists(f): self.selected_files.append(f) else: - opts = QFileDialog.ShowDirsOnly if mode == QFileDialog.DirectoryOnly else QFileDialog.Option() + opts = QFileDialog.ShowDirsOnly if mode == QFileDialog.Directory else QFileDialog.Option() f = unicode(QFileDialog.getExistingDirectory(parent, title, initial_dir, opts)) if os.path.exists(f): self.selected_files.append(f) @@ -534,7 +534,7 @@ class FileDialog(QObject): def choose_dir(window, name, title, default_dir='~'): fd = FileDialog(title=title, filters=[], add_all_files_filter=False, - parent=window, name=name, mode=QFileDialog.DirectoryOnly, + parent=window, name=name, mode=QFileDialog.Directory, default_dir=default_dir) dir = fd.get_files() if dir: From f57024d7c892d8468d95f1226be087cdaf5a4614 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 10:28:17 -0600 Subject: [PATCH 03/69] /browse: Fix bug in handling custom column based categories --- resources/content_server/mobile.css | 2 ++ src/calibre/library/server/browse.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/resources/content_server/mobile.css b/resources/content_server/mobile.css index 0022b2a134..a887684841 100644 --- a/resources/content_server/mobile.css +++ b/resources/content_server/mobile.css @@ -53,6 +53,7 @@ div.navigation { } #listing td { padding: 0.25em; + vertical-align: middle; } #listing td.thumbnail { @@ -73,6 +74,7 @@ div.navigation { overflow: hidden; text-align: center; text-decoration: none; + vertical-align: middle; } #logo { diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index 247e6945e6..ea69ad77ef 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en' import operator, os, json from binascii import hexlify, unhexlify +from urllib import quote import cherrypy @@ -136,7 +137,7 @@ def get_category_items(category, items, db, datatype): # {{{ q = i.category if not q: q = category - href = '/browse/matches/%s/%s'%(q, id_) + href = '/browse/matches/%s/%s'%(quote(q), quote(id_)) return templ.format(xml(name), rating, xml(desc), xml(href), rstring) @@ -329,7 +330,7 @@ class BrowseServer(object): cats = [('
  • {0}' '{0}' '/browse/category/{1}
  • ') - .format(xml(x, True), xml(y), xml(_('Browse books by')), + .format(xml(x, True), xml(quote(y)), xml(_('Browse books by')), src='/browse/icon/'+z) for x, y, z in cats] From 6143bdcce4a77e4a5dde59ba1de1922ed410df02 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 11:11:25 -0600 Subject: [PATCH 04/69] calibre-server: Use cherrypy plugin for --daemonize instead of custom code --- src/calibre/library/server/main.py | 38 +++--------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) diff --git a/src/calibre/library/server/main.py b/src/calibre/library/server/main.py index 54dd205b35..f788ab81b5 100644 --- a/src/calibre/library/server/main.py +++ b/src/calibre/library/server/main.py @@ -38,47 +38,15 @@ def option_parser(): ' in the GUI')) return parser -def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'): - try: - pid = os.fork() - if pid > 0: - # exit first parent - sys.exit(0) - except OSError, e: - print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror) - sys.exit(1) - - # decouple from parent environment - os.chdir("/") - os.setsid() - os.umask(0) - - # do second fork - try: - pid = os.fork() - if pid > 0: - # exit from second parent - sys.exit(0) - except OSError, e: - print >>sys.stderr, "fork #2 failed: %d (%s)" % (e.errno, e.strerror) - sys.exit(1) - - # Redirect standard file descriptors. - si = file(stdin, 'r') - so = file(stdout, 'a+') - se = file(stderr, 'a+', 0) - os.dup2(si.fileno(), sys.stdin.fileno()) - os.dup2(so.fileno(), sys.stdout.fileno()) - os.dup2(se.fileno(), sys.stderr.fileno()) - - def main(args=sys.argv): from calibre.library.database2 import LibraryDatabase2 parser = option_parser() opts, args = parser.parse_args(args) if opts.daemonize and not iswindows: - daemonize() + from cherrypy.process.plugins import Daemonizer + d = Daemonizer(cherrypy.engine) + d.subscribe() if opts.pidfile is not None: with open(opts.pidfile, 'wb') as f: f.write(str(os.getpid())) From 6b7962c2b26efaffcdfe7b8910678d2f66ecc94a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 11:18:53 -0600 Subject: [PATCH 05/69] calibre-server: Use cherrypy plugin for --pidfile instead of custom code --- src/calibre/library/server/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/library/server/main.py b/src/calibre/library/server/main.py index f788ab81b5..6d01080886 100644 --- a/src/calibre/library/server/main.py +++ b/src/calibre/library/server/main.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, sys +import sys from threading import Thread from calibre.library.server import server_config as config @@ -48,8 +48,8 @@ def main(args=sys.argv): d = Daemonizer(cherrypy.engine) d.subscribe() if opts.pidfile is not None: - with open(opts.pidfile, 'wb') as f: - f.write(str(os.getpid())) + from cherrypy.process.plugins import PIDFile + PIDFile(cherrypy.engine, opts.pidfile).subscribe() cherrypy.log.screen = True from calibre.utils.config import prefs if opts.with_library is None: From c65a8fc0b8f7c3691260d54a6d84b676299527eb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 12:36:18 -0600 Subject: [PATCH 06/69] Content server: Handle switch library in GUI gracefully --- resources/content_server/browse/browse.js | 2 +- src/calibre/gui2/ui.py | 2 + src/calibre/library/server/base.py | 77 ++++++++++++++++------- src/calibre/library/server/cache.py | 5 +- src/calibre/utils/mdns.py | 9 +-- 5 files changed, 67 insertions(+), 28 deletions(-) diff --git a/resources/content_server/browse/browse.js b/resources/content_server/browse/browse.js index 29b84ac2d7..5e3cee14c0 100644 --- a/resources/content_server/browse/browse.js +++ b/resources/content_server/browse/browse.js @@ -109,7 +109,7 @@ function toplevel_layout() { var last = $(".toplevel li").last(); var title = $('.toplevel h3').first(); var bottom = last.position().top + last.height() - title.position().top; - $("#main").height(Math.max(200, bottom)); + $("#main").height(Math.max(200, bottom+75)); } function toplevel() { diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 6ada31418a..0ae15b0caa 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -365,6 +365,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, # {{{ except: olddb = None db = LibraryDatabase2(newloc) + if self.content_server is not None: + self.content_server.set_database(db) self.library_path = newloc self.book_on_device(None, reset=True) db.set_book_on_device_func(self.book_on_device) diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index f6d8e68a04..84e748a949 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -10,6 +10,7 @@ import logging from logging.handlers import RotatingFileHandler import cherrypy +from cherrypy.process.plugins import SimplePlugin from calibre.constants import __appname__, __version__ from calibre.utils.date import fromtimestamp @@ -54,16 +55,43 @@ class DispatchController(object): # {{{ # }}} +class BonJour(SimplePlugin): + + def __init__(self, engine, port=8080): + SimplePlugin.__init__(self, engine) + self.port = port + + def start(self): + try: + publish_zeroconf('Books in calibre', '_stanza._tcp', + self.port, {'path':'/stanza'}) + except: + import traceback + cherrypy.log.error('Failed to start BonJour:') + cherrypy.log.error(traceback.format_exc()) + + start.priority = 90 + + def stop(self): + try: + stop_zeroconf() + except: + import traceback + cherrypy.log.error('Failed to stop BonJour:') + cherrypy.log.error(traceback.format_exc()) + + + stop.priority = 10 + +cherrypy.engine.bonjour = BonJour(cherrypy.engine) + + class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, BrowseServer): server_name = __appname__ + '/' + __version__ def __init__(self, db, opts, embedded=False, show_tracebacks=True): - self.db = db - for item in self.db: - item - break self.opts = opts self.embedded = embedded self.state_callback = None @@ -71,7 +99,14 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, map(int, self.opts.max_cover.split('x')) path = P('content_server') self.build_time = fromtimestamp(os.stat(path).st_mtime) - self.default_cover = open(P('content_server/default_cover.jpg'), 'rb').read() + self.default_cover = open(P('content_server/default_cover.jpg'), 'rb').read() + + cherrypy.engine.bonjour.port = opts.port + + Cache.__init__(self) + + self.set_database(db) + cherrypy.config.update({ 'log.screen' : opts.develop, 'engine.autoreload_on' : opts.develop, @@ -97,18 +132,27 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, 'tools.digest_auth.users' : {opts.username.strip():opts.password.strip()}, } - sr = getattr(opts, 'restriction', None) - sr = db.prefs.get('cs_restriction', '') if sr is None else sr - self.set_search_restriction(sr) self.is_running = False self.exception = None + self.setup_loggers() + cherrypy.engine.bonjour.subscribe() + + def set_database(self, db): + self.db = db + sr = getattr(self.opts, 'restriction', None) + sr = db.prefs.get('cs_restriction', '') if sr is None else sr + self.set_search_restriction(sr) + + def graceful(self): + cherrypy.engine.graceful() def set_search_restriction(self, restriction): if restriction: self.search_restriction = 'search:"%s"'%restriction else: self.search_restriction = '' + self.reset_caches() def setup_loggers(self): access_file = log_access_file @@ -140,7 +184,6 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, root_conf['request.dispatch'] = d.dispatcher self.config['/'] = root_conf - self.setup_loggers() cherrypy.tree.mount(root=None, config=self.config) try: try: @@ -154,24 +197,14 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, cherrypy.engine.start() self.is_running = True - try: - publish_zeroconf('Books in calibre', '_stanza._tcp', - self.opts.port, {'path':'/stanza'}) - except: - import traceback - cherrypy.log.error('Failed to start BonJour:') - cherrypy.log.error(traceback.format_exc()) + #if hasattr(cherrypy.engine, 'signal_handler'): + # cherrypy.engine.signal_handler.subscribe() + cherrypy.engine.block() except Exception, e: self.exception = e finally: self.is_running = False - try: - stop_zeroconf() - except: - import traceback - cherrypy.log.error('Failed to stop BonJour:') - cherrypy.log.error(traceback.format_exc()) try: if callable(self.state_callback): self.state_callback(self.is_running) diff --git a/src/calibre/library/server/cache.py b/src/calibre/library/server/cache.py index 29602a114c..cc4f7a3886 100644 --- a/src/calibre/library/server/cache.py +++ b/src/calibre/library/server/cache.py @@ -10,7 +10,10 @@ from calibre.utils.ordered_dict import OrderedDict class Cache(object): - def add_routes(self, c): + def __init__(self): + self.reset_caches() + + def reset_caches(self): self._category_cache = OrderedDict() self._search_cache = OrderedDict() diff --git a/src/calibre/utils/mdns.py b/src/calibre/utils/mdns.py index 74547b9573..b7cc8757d3 100644 --- a/src/calibre/utils/mdns.py +++ b/src/calibre/utils/mdns.py @@ -58,11 +58,12 @@ def publish(desc, type, port, properties=None, add_hostname=True): ''' port = int(port) server = start_server() + try: + hostname = socket.gethostname().partition('.')[0] + except: + hostname = 'Unknown' + if add_hostname: - try: - hostname = socket.gethostname().partition('.')[0] - except: - hostname = 'Unknown' desc += ' (on %s)'%hostname local_ip = get_external_ip() type = type+'.local.' From 81400b56c9fbea19c46d4af61fe7944e5bbe39bb Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Mon, 18 Oct 2010 19:39:11 +0100 Subject: [PATCH 07/69] Make check_library delete work on linux and mac by checking the file type. --- src/calibre/gui2/dialogs/check_library.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py index 741a42893d..f07a25c51a 100644 --- a/src/calibre/gui2/dialogs/check_library.py +++ b/src/calibre/gui2/dialogs/check_library.py @@ -11,7 +11,7 @@ from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \ from calibre.gui2.dialogs.confirm_delete import confirm from calibre.library.check_library import CheckLibrary, CHECKS -from calibre.library.database2 import delete_file +from calibre.library.database2 import delete_file, delete_tree class Item(QTreeWidgetItem): pass @@ -147,7 +147,11 @@ class CheckLibraryDialog(QDialog): for it in items: if it.checkState(1): try: - delete_file(os.path.join(self.db.library_path ,unicode(it.text(1)))) + p = os.path.join(self.db.library_path ,unicode(it.text(1))) + if os.path.isdir(p): + delete_tree(p) + else: + delete_file(p) except: print 'failed to delete', os.path.join(self.db.library_path ,unicode(it.text(1))) self.run_the_check() From d1e7517fcec66484a07ddaa27b9aa88049db2091 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Mon, 18 Oct 2010 19:43:35 +0100 Subject: [PATCH 08/69] Remove cancel button --- src/calibre/gui2/dialogs/check_library.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py index cf51999559..1cd11e7807 100644 --- a/src/calibre/gui2/dialogs/check_library.py +++ b/src/calibre/gui2/dialogs/check_library.py @@ -44,14 +44,10 @@ class CheckLibraryDialog(QDialog): self.delete = QPushButton('Delete &marked') self.delete.setDefault(False) self.delete.clicked.connect(self.delete_marked) - self.cancel = QPushButton('&Cancel') - self.cancel.setDefault(False) - self.cancel.clicked.connect(self.reject) self.bbox = QDialogButtonBox(self) - self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole) self.bbox.addButton(self.check, QDialogButtonBox.ActionRole) self.bbox.addButton(self.delete, QDialogButtonBox.ActionRole) - self.bbox.addButton(self.cancel, QDialogButtonBox.RejectRole) + self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole) self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole) h = QHBoxLayout() From 15b4bf7f8b598c4921cc151aea3f74f9248f06b4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 13:38:08 -0600 Subject: [PATCH 09/69] /browse: Add an 'All book' top level category and Fix #7209 (New content server observations regarding restricted items) --- src/calibre/library/server/base.py | 1 + src/calibre/library/server/browse.py | 28 +++++++++++++++++++++------- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index 84e748a949..3a081fc427 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -148,6 +148,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, cherrypy.engine.graceful() def set_search_restriction(self, restriction): + self.search_restriction_name = restriction if restriction: self.search_restriction = 'search:"%s"'%restriction else: diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index ea69ad77ef..ea86de4c1b 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -116,7 +116,10 @@ def render_rating(rating, container='span', prefix=None): # {{{ # }}} -def get_category_items(category, items, db, datatype): # {{{ +def get_category_items(category, items, restriction, datatype): # {{{ + + if category == 'search': + items = [x for x in items if x.name != restriction] def item(i): templ = (u'
    ' @@ -299,6 +302,7 @@ class BrowseServer(object): category_meta = self.db.field_metadata cats = [ (_('Newest'), 'newest', 'forward.png'), + (_('All books'), 'allbooks', 'book.png'), ] def getter(x): @@ -370,7 +374,8 @@ class BrowseServer(object): if len(items) <= self.opts.max_opds_ungrouped_items: script = 'false' - items = get_category_items(category, items, self.db, datatype) + items = get_category_items(category, items, + self.search_restriction_name, datatype) else: getter = lambda x: unicode(getattr(x, 'sort', x.name)) starts = set([]) @@ -440,7 +445,8 @@ class BrowseServer(object): entries.append(x) sort = self.browse_sort_categories(entries, sort) - entries = get_category_items(category, entries, self.db, datatype) + entries = get_category_items(category, entries, + self.search_restriction_name, datatype) return json.dumps(entries, ensure_ascii=False) @@ -451,6 +457,8 @@ class BrowseServer(object): ans = self.browse_toplevel() elif category == 'newest': raise cherrypy.InternalRedirect('/browse/matches/newest/dummy') + elif category == 'allbooks': + raise cherrypy.InternalRedirect('/browse/matches/allbooks/dummy') else: ans = self.browse_category(category, category_sort) @@ -478,16 +486,20 @@ class BrowseServer(object): raise cherrypy.HTTPError(404, 'invalid category id: %r'%cid) categories = self.categories_cache() - if category not in categories and category != 'newest': + if category not in categories and \ + category not in ('newest', 'allbooks'): raise cherrypy.HTTPError(404, 'category not found') fm = self.db.field_metadata try: category_name = fm[category]['name'] dt = fm[category]['datatype'] except: - if category != 'newest': + if category not in ('newest', 'allbooks'): raise - category_name = _('Newest') + category_name = { + 'newest' : _('Newest'), + 'allbooks' : _('All books'), + }[category] dt = None hide_sort = 'true' if dt == 'series' else 'false' @@ -498,8 +510,10 @@ class BrowseServer(object): except: raise cherrypy.HTTPError(404, 'Search: %r not understood'%which) elif category == 'newest': - ids = list(self.db.data.iterallids()) + ids = self.search_cache('') hide_sort = 'true' + elif category == 'allbooks': + ids = self.search_cache('') else: q = category if q == 'news': From f9006854a088217e0e057d0a583ad7a5d8733301 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 13:40:15 -0600 Subject: [PATCH 10/69] Fix #7220 (Dismiss "Fetch Metadata" box when fetch fails) --- src/calibre/gui2/dialogs/fetch_metadata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/dialogs/fetch_metadata.py b/src/calibre/gui2/dialogs/fetch_metadata.py index eb6edce75d..6ee9cd9a96 100644 --- a/src/calibre/gui2/dialogs/fetch_metadata.py +++ b/src/calibre/gui2/dialogs/fetch_metadata.py @@ -190,7 +190,8 @@ class FetchMetadata(QDialog, Ui_FetchMetadata): if self.model.rowCount() < 1: info_dialog(self, _('No metadata found'), _('No metadata found, try adjusting the title and author ' - 'or the ISBN key.')).exec_() + 'and/or removing the ISBN.')).exec_() + self.reject() return self.matches.setModel(self.model) From 757b8fa4c056d190373635f5f78f281c5646da9e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 13:50:39 -0600 Subject: [PATCH 11/69] Fix #7221 (You cannot delete a Series listing from List view) --- src/calibre/gui2/library/models.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 2946985342..0286acc782 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -783,18 +783,22 @@ class BooksModel(QAbstractTableModel): # {{{ self.db.set_rating(id, val) elif column == 'series': val = val.strip() - pat = re.compile(r'\[([.0-9]+)\]') - match = pat.search(val) - if match is not None: - self.db.set_series_index(id, float(match.group(1))) - val = pat.sub('', val).strip() - elif val: - if tweaks['series_index_auto_increment'] == 'next': - ni = self.db.get_next_series_num_for(val) - if ni != 1: - self.db.set_series_index(id, ni) - if val: + if not val: self.db.set_series(id, val) + self.db.set_series_index(id, 1.0) + else: + pat = re.compile(r'\[([.0-9]+)\]') + match = pat.search(val) + if match is not None: + self.db.set_series_index(id, float(match.group(1))) + val = pat.sub('', val).strip() + elif val: + if tweaks['series_index_auto_increment'] == 'next': + ni = self.db.get_next_series_num_for(val) + if ni != 1: + self.db.set_series_index(id, ni) + if val: + self.db.set_series(id, val) elif column == 'timestamp': if val.isNull() or not val.isValid(): return False From 1ebee86c83e5b3c89a2b3af5a8e62de6812b6504 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 20:02:32 -0600 Subject: [PATCH 12/69] Support for the SONY periodical format. Now news downloaded with calibre will appear in the Periodicals section of your SONY and will have the special periodical navigation enabled. --- resources/recipes/atlantic.recipe | 8 +- src/calibre/customize/conversion.py | 5 + src/calibre/ebooks/epub/__init__.py | 16 ++- src/calibre/ebooks/epub/output.py | 13 +- src/calibre/ebooks/epub/periodical.py | 170 ++++++++++++++++++++++++++ src/calibre/ebooks/mobi/output.py | 9 +- src/calibre/web/feeds/news.py | 1 + 7 files changed, 210 insertions(+), 12 deletions(-) create mode 100644 src/calibre/ebooks/epub/periodical.py diff --git a/resources/recipes/atlantic.recipe b/resources/recipes/atlantic.recipe index a41a931e37..5ae0f7d993 100644 --- a/resources/recipes/atlantic.recipe +++ b/resources/recipes/atlantic.recipe @@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe): for poem in soup.findAll('div', attrs={'class':'poem'}): title = self.tag_to_string(poem.find('h4')) desc = self.tag_to_string(poem.find(attrs={'class':'author'})) - url = 'http://www.theatlantic.com'+poem.find('a')['href'] + url = poem.find('a')['href'] + if url.startswith('/'): + url = 'http://www.theatlantic.com' + url self.log('\tFound article:', title, 'at', url) self.log('\t\t', desc) poems.append({'title':title, 'url':url, 'description':desc, @@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe): if div is not None: self.log('Found section: Advice') title = self.tag_to_string(div.find('h4')) - url = 'http://www.theatlantic.com'+div.find('a')['href'] + url = div.find('a')['href'] + if url.startswith('/'): + url = 'http://www.theatlantic.com' + url desc = self.tag_to_string(div.find('p')) self.log('\tFound article:', title, 'at', url) self.log('\t\t', desc) diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index c36f83bd2f..ec83600a49 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -294,3 +294,8 @@ class OutputFormatPlugin(Plugin): ''' raise NotImplementedError + @property + def is_periodical(self): + return self.oeb.metadata.publication_type and \ + unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:') + diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py index f5de8421e0..53dd01d625 100644 --- a/src/calibre/ebooks/epub/__init__.py +++ b/src/calibre/ebooks/epub/__init__.py @@ -15,22 +15,30 @@ def rules(stylesheets): if r.type == r.STYLE_RULE: yield r -def initialize_container(path_to_container, opf_name='metadata.opf'): +def initialize_container(path_to_container, opf_name='metadata.opf', + extra_entries=[]): ''' Create an empty EPUB document, with a default skeleton. ''' - CONTAINER='''\ + rootfiles = '' + for path, mimetype, _ in extra_entries: + rootfiles += u''.format( + path, mimetype) + CONTAINER = u'''\ - + + {extra_entries} - '''%opf_name + '''.format(opf_name, extra_entries=rootfiles).encode('utf-8') zf = ZipFile(path_to_container, 'w') zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', '', 0700) zf.writestr('META-INF/container.xml', CONTAINER) + for path, _, data in extra_entries: + zf.writestr(path, data) return zf diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index 4146031cd2..38820010a8 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -106,6 +106,7 @@ class EPUBOutput(OutputFormatPlugin): recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)]) + def workaround_webkit_quirks(self): # {{{ from calibre.ebooks.oeb.base import XPath for x in self.oeb.spine: @@ -183,6 +184,12 @@ class EPUBOutput(OutputFormatPlugin): with TemporaryDirectory('_epub_output') as tdir: from calibre.customize.ui import plugin_for_output_format + metadata_xml = None + extra_entries = [] + if self.is_periodical: + from calibre.ebooks.epub.periodical import sony_metadata + metadata_xml, atom_xml = sony_metadata(oeb) + extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)] oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] @@ -194,10 +201,14 @@ class EPUBOutput(OutputFormatPlugin): encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid) from calibre.ebooks.epub import initialize_container - epub = initialize_container(output_path, os.path.basename(opf)) + epub = initialize_container(output_path, os.path.basename(opf), + extra_entries=extra_entries) epub.add_dir(tdir) if encryption is not None: epub.writestr('META-INF/encryption.xml', encryption) + if metadata_xml is not None: + epub.writestr('META-INF/metadata.xml', + metadata_xml.encode('utf-8')) if opts.extract_to is not None: if os.path.exists(opts.extract_to): shutil.rmtree(opts.extract_to) diff --git a/src/calibre/ebooks/epub/periodical.py b/src/calibre/ebooks/epub/periodical.py new file mode 100644 index 0000000000..c68dc9e272 --- /dev/null +++ b/src/calibre/ebooks/epub/periodical.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from uuid import uuid4 + +from calibre.constants import __appname__, __version__ +from calibre import strftime, prepare_string_for_xml as xml + +SONY_METADATA = u'''\ + + + + {title} + {publisher} + {short_title} + {issue_date} + {language} + + + + + +''' + +SONY_ATOM = u'''\ + + + +{short_title} +{updated} +{id} +{entries} + +''' + +SONY_ATOM_SECTION = u'''\ + + {title} + + {id} + {updated} + {desc} + + newspaper/section + + +''' + +SONY_ATOM_ENTRY = u'''\ + + {title} + {author} + + {id} + {updated} + {desc} + + {word_count} + newspaper/article + + +''' + +def sony_metadata(oeb): + m = oeb.metadata + title = short_title = unicode(m.title[0]) + publisher = __appname__ + ' ' + __version__ + for k, n in m.title[0].attrib.items(): + if k.endswith('file-as'): + short_title = n + try: + date = unicode(m.date[0]).split('T')[0] + except: + date = strftime('%Y-%m-%d') + try: + language = unicode(m.language[0]).replace('_', '-') + except: + language = 'en' + short_title = xml(short_title, True) + + metadata = SONY_METADATA.format(title=xml(title), + short_title=short_title, + publisher=xml(publisher), issue_date=xml(date), + language=xml(language)) + + updated = strftime('%Y-%m-%dT%H:%M:%SZ') + + def cal_id(x): + for k, v in x.attrib.items(): + if k.endswith('scheme') and v == 'uuid': + return True + + try: + base_id = unicode(list(filter(cal_id, m.identifier))[0]) + except: + base_id = str(uuid4()) + + entries = [] + seen_titles = set([]) + for i, section in enumerate(oeb.toc): + if not section.href: + continue + secid = 'section%d'%i + sectitle = section.title + if not sectitle: + sectitle = _('Unknown') + d = 1 + bsectitle = sectitle + while sectitle in seen_titles: + sectitle = bsectitle + ' ' + str(d) + d += 1 + seen_titles.add(sectitle) + sectitle = xml(sectitle, True) + secdesc = section.description + if not secdesc: + secdesc = '' + secdesc = xml(secdesc) + entries.append(SONY_ATOM_SECTION.format(title=sectitle, + href=section.href, id=xml(base_id)+'/'+secid, + short_title=short_title, desc=secdesc, updated=updated)) + + for j, article in enumerate(section): + if not article.href: + continue + atitle = article.title + btitle = atitle + d = 1 + while atitle in seen_titles: + atitle = btitle + ' ' + str(d) + d += 1 + + auth = article.author if article.author else '' + desc = section.description + if not desc: + desc = '' + aid = 'article%d'%j + + entries.append(SONY_ATOM_ENTRY.format( + title=xml(atitle), + author=xml(auth), + updated=updated, + desc=desc, + short_title=short_title, + section_title=sectitle, + href=article.href, + word_count=str(1), + id=xml(base_id)+'/'+secid+'/'+aid + )) + + atom = SONY_ATOM.format(short_title=short_title, + entries='\n\n'.join(entries), updated=updated, + id=xml(base_id)).encode('utf-8') + + return metadata, atom + diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py index 49da18ea7b..4159c6dd40 100644 --- a/src/calibre/ebooks/mobi/output.py +++ b/src/calibre/ebooks/mobi/output.py @@ -42,11 +42,10 @@ class MOBIOutput(OutputFormatPlugin): ]) def check_for_periodical(self): - if self.oeb.metadata.publication_type and \ - unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'): - self.periodicalize_toc() - self.check_for_masthead() - self.opts.mobi_periodical = True + if self.is_periodical: + self.periodicalize_toc() + self.check_for_masthead() + self.opts.mobi_periodical = True else: self.opts.mobi_periodical = False diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index f3d77061c3..f710b52204 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1102,6 +1102,7 @@ class BasicNewsRecipe(Recipe): if self.output_profile.periodical_date_in_title: title += strftime(self.timefmt) mi = MetaInformation(title, [__appname__]) + mi.title_sort = self.short_title() mi.publisher = __appname__ mi.author_sort = __appname__ mi.publication_type = 'periodical:'+self.publication_type From 199d870b191ab863f21a4f55341501cbacd5c51b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 21:25:58 -0600 Subject: [PATCH 13/69] Setting EPUB metadata: Fix date format. Fix language being overwritten by und when unspecified. Fix empty ISBN identifier being created --- .../recipes/theeconomictimes_india.recipe | 10 ++++---- src/calibre/ebooks/metadata/opf2.py | 25 +++++++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/resources/recipes/theeconomictimes_india.recipe b/resources/recipes/theeconomictimes_india.recipe index 5fef377f6e..59cd56b67e 100644 --- a/resources/recipes/theeconomictimes_india.recipe +++ b/resources/recipes/theeconomictimes_india.recipe @@ -19,18 +19,18 @@ class TheEconomicTimes(BasicNewsRecipe): simultaneous_downloads = 1 encoding = 'utf-8' language = 'en_IN' - publication_type = 'newspaper' + publication_type = 'newspaper' masthead_url = 'http://economictimes.indiatimes.com/photo/2676871.cms' - extra_css = """ body{font-family: Arial,Helvetica,sans-serif} + extra_css = """ body{font-family: Arial,Helvetica,sans-serif} .heading1{font-size: xx-large; font-weight: bold} """ - + conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } - + keep_only_tags = [dict(attrs={'class':['heading1','headingnext','Normal']})] remove_tags = [dict(name=['object','link','embed','iframe','base','table','meta'])] @@ -48,5 +48,5 @@ class TheEconomicTimes(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.findAll(style=True): - del item['style'] + del item['style'] return self.adeify_images(soup) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 5c2477c3dc..62d57f2251 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -382,11 +382,13 @@ class Guide(ResourceCollection): # {{{ class MetadataField(object): - def __init__(self, name, is_dc=True, formatter=None, none_is=None): + def __init__(self, name, is_dc=True, formatter=None, none_is=None, + renderer=lambda x: unicode(x)): self.name = name self.is_dc = is_dc self.formatter = formatter self.none_is = none_is + self.renderer = renderer def __real_get__(self, obj, type=None): ans = obj.get_metadata_element(self.name) @@ -418,7 +420,7 @@ class MetadataField(object): return if elem is None: elem = obj.create_metadata_element(self.name, is_dc=self.is_dc) - obj.set_text(elem, unicode(val)) + obj.set_text(elem, self.renderer(val)) def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)): @@ -489,10 +491,11 @@ class OPF(object): # {{{ series = MetadataField('series', is_dc=False) series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1) rating = MetadataField('rating', is_dc=False, formatter=int) - pubdate = MetadataField('date', formatter=parse_date) + pubdate = MetadataField('date', formatter=parse_date, + renderer=isoformat) publication_type = MetadataField('publication_type', is_dc=False) timestamp = MetadataField('timestamp', is_dc=False, - formatter=parse_date) + formatter=parse_date, renderer=isoformat) def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True, @@ -826,11 +829,10 @@ class OPF(object): # {{{ def fset(self, val): matches = self.isbn_path(self.metadata) - if val is None: - if matches: - for x in matches: - x.getparent().remove(x) - return + if not val: + for x in matches: + x.getparent().remove(x) + return if not matches: attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'} matches = [self.create_metadata_element('identifier', @@ -987,11 +989,14 @@ class OPF(object): # {{{ def smart_update(self, mi, replace_metadata=False): for attr in ('title', 'authors', 'author_sort', 'title_sort', 'publisher', 'series', 'series_index', 'rating', - 'isbn', 'language', 'tags', 'category', 'comments', + 'isbn', 'tags', 'category', 'comments', 'pubdate'): val = getattr(mi, attr, None) if val is not None and val != [] and val != (None, None): setattr(self, attr, val) + lang = getattr(mi, 'language', None) + if lang and lang != 'und': + self.language = lang temp = self.to_book_metadata() temp.smart_update(mi, replace_metadata=replace_metadata) self._user_metadata_ = temp.get_all_user_metadata(True) From b0ffb1fb6580f01fd931a9a721b50fbbe19439d2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 18 Oct 2010 22:31:53 -0600 Subject: [PATCH 14/69] ... --- src/calibre/ebooks/epub/periodical.py | 9 ++++++--- src/calibre/web/feeds/news.py | 3 +-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/epub/periodical.py b/src/calibre/ebooks/epub/periodical.py index c68dc9e272..ad75bb4706 100644 --- a/src/calibre/ebooks/epub/periodical.py +++ b/src/calibre/ebooks/epub/periodical.py @@ -80,9 +80,12 @@ def sony_metadata(oeb): m = oeb.metadata title = short_title = unicode(m.title[0]) publisher = __appname__ + ' ' + __version__ - for k, n in m.title[0].attrib.items(): - if k.endswith('file-as'): - short_title = n + try: + pt = unicode(oeb.metadata.publication_type[0]) + short_title = u''.join(pt.split(':')[2:]) + except: + pass + try: date = unicode(m.date[0]).split('T')[0] except: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index f710b52204..cb6bf30bcf 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1102,10 +1102,9 @@ class BasicNewsRecipe(Recipe): if self.output_profile.periodical_date_in_title: title += strftime(self.timefmt) mi = MetaInformation(title, [__appname__]) - mi.title_sort = self.short_title() mi.publisher = __appname__ mi.author_sort = __appname__ - mi.publication_type = 'periodical:'+self.publication_type + mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() mi.timestamp = nowf() mi.comments = self.description if not isinstance(mi.comments, unicode): From d463eff5eac01572d4932f6fcca95b04a346b044 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 00:03:55 -0600 Subject: [PATCH 15/69] ... --- src/calibre/ebooks/epub/periodical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/epub/periodical.py b/src/calibre/ebooks/epub/periodical.py index ad75bb4706..b46bea3719 100644 --- a/src/calibre/ebooks/epub/periodical.py +++ b/src/calibre/ebooks/epub/periodical.py @@ -82,7 +82,7 @@ def sony_metadata(oeb): publisher = __appname__ + ' ' + __version__ try: pt = unicode(oeb.metadata.publication_type[0]) - short_title = u''.join(pt.split(':')[2:]) + short_title = u':'.join(pt.split(':')[2:]) except: pass From f24f95cd1bebb962d0da39bcc395210a1c14fda6 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Tue, 19 Oct 2010 11:04:48 +0100 Subject: [PATCH 16/69] Make int and float custom fields work in save templates --- src/calibre/library/save_to_disk.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/calibre/library/save_to_disk.py b/src/calibre/library/save_to_disk.py index 94f9dbd229..c6cc12a978 100644 --- a/src/calibre/library/save_to_disk.py +++ b/src/calibre/library/save_to_disk.py @@ -131,15 +131,14 @@ class SafeFormat(TemplateFormatter): self.vformat(b['display']['composite_template'], [], kwargs) return self.composite_values[key] if key in kwargs: - return kwargs[key].replace('/', '_').replace('\\', '_') + val = kwargs[key] + return val.replace('/', '_').replace('\\', '_') return '' except: if DEBUG: traceback.print_exc() return key -safe_formatter = SafeFormat() - def get_components(template, mi, id, timefmt='%b %Y', length=250, sanitize_func=ascii_filename, replace_whitespace=False, to_lowercase=False): @@ -173,17 +172,22 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250, custom_metadata = mi.get_all_user_metadata(make_copy=False) for key in custom_metadata: if key in format_args: + cm = custom_metadata[key] ## TODO: NEWMETA: should ratings be divided by 2? The standard rating isn't... - if custom_metadata[key]['datatype'] == 'series': + if cm['datatype'] == 'series': format_args[key] = tsfmt(format_args[key]) if key+'_index' in format_args: format_args[key+'_index'] = fmt_sidx(format_args[key+'_index']) - elif custom_metadata[key]['datatype'] == 'datetime': + elif cm['datatype'] == 'datetime': format_args[key] = strftime(timefmt, format_args[key].timetuple()) - elif custom_metadata[key]['datatype'] == 'bool': + elif cm['datatype'] == 'bool': format_args[key] = _('yes') if format_args[key] else _('no') - - components = safe_formatter.safe_format(template, format_args, + elif cm['datatype'] in ['int', 'float']: + if format_args[key] != 0: + format_args[key] = unicode(format_args[key]) + else: + format_args[key] = '' + components = SafeFormat().safe_format(template, format_args, 'G_C-EXCEPTION!', mi) components = [x.strip() for x in components.split('/') if x.strip()] components = [sanitize_func(x) for x in components if x] From d8fc285b70ab186d793493821d04c0cf7c65d2c6 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Tue, 19 Oct 2010 12:13:20 +0100 Subject: [PATCH 17/69] Make floats work in composite columns --- src/calibre/ebooks/metadata/book/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 3b96c98a7b..593e161df7 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -43,7 +43,7 @@ class SafeFormat(TemplateFormatter): b = self.book.get_user_metadata(key, False) if b and b['datatype'] == 'int' and self.book.get(key, 0) == 0: v = '' - elif b and b['datatype'] == 'float' and b.get(key, 0.0) == 0.0: + elif b and b['datatype'] == 'float' and self.book.get(key, 0.0) == 0.0: v = '' else: ign, v = self.book.format_field(key.lower(), series_with_index=False) From 730bbb2d09980fe8a835ad360eced33a3d31ea10 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 08:45:14 -0600 Subject: [PATCH 18/69] EPUB Input: Handle EPUB files with multiple OPF files. Fixes #7229 (E-book Viewer crash: "IndexError:list index out of range") --- src/calibre/ebooks/epub/input.py | 34 ++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py index 30a3327b63..ec2004d81c 100644 --- a/src/calibre/ebooks/epub/input.py +++ b/src/calibre/ebooks/epub/input.py @@ -108,6 +108,27 @@ class EPUBInput(InputFormatPlugin): open('calibre_raster_cover.jpg', 'wb').write( renderer) + def find_opf(self): + def attr(n, attr): + for k, v in n.attrib.items(): + if k.endswith(attr): + return v + try: + with open('META-INF/container.xml') as f: + root = etree.fromstring(f.read()) + for r in root.xpath('//*[local-name()="rootfile"]'): + if attr(r, 'media-type') != "application/oebps-package+xml": + continue + path = attr(r, 'full-path') + if not path: + continue + path = os.path.join(os.getcwdu(), *path.split('/')) + if os.path.exists(path): + return path + except: + import traceback + traceback.print_exc() + def convert(self, stream, options, file_ext, log, accelerators): from calibre.utils.zipfile import ZipFile from calibre import walk @@ -116,12 +137,13 @@ class EPUBInput(InputFormatPlugin): zf = ZipFile(stream) zf.extractall(os.getcwd()) encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml')) - opf = None - for f in walk(u'.'): - if f.lower().endswith('.opf') and '__MACOSX' not in f and \ - not os.path.basename(f).startswith('.'): - opf = os.path.abspath(f) - break + opf = self.find_opf() + if opf is None: + for f in walk(u'.'): + if f.lower().endswith('.opf') and '__MACOSX' not in f and \ + not os.path.basename(f).startswith('.'): + opf = os.path.abspath(f) + break path = getattr(stream, 'name', 'stream') if opf is None: From 973afc073333c6cccbb10284cc688319dce68c38 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 11:34:46 -0600 Subject: [PATCH 19/69] Content server: Make /mobile a little prettier --- resources/content_server/mobile.css | 17 +++++++++++++++++ src/calibre/library/server/mobile.py | 21 +++++++++++---------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/resources/content_server/mobile.css b/resources/content_server/mobile.css index a887684841..28d12bb6db 100644 --- a/resources/content_server/mobile.css +++ b/resources/content_server/mobile.css @@ -1,5 +1,9 @@ /* CSS for the mobile version of the content server webpage */ +.body { + font-family: sans-serif; +} + .navigation table.buttons { width: 100%; } @@ -85,4 +89,17 @@ div.navigation { clear: both; } +.data-container { + display: inline-block; + vertical-align: middle; +} +.first-line { + font-size: larger; + font-weight: bold; +} + +.second-line { + margin-top: 0.75ex; + display: block; +} diff --git a/src/calibre/library/server/mobile.py b/src/calibre/library/server/mobile.py index b9ca24a823..7c2f959131 100644 --- a/src/calibre/library/server/mobile.py +++ b/src/calibre/library/server/mobile.py @@ -112,7 +112,6 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS): CLASS('thumbnail')) data = TD() - last = None for fmt in book['formats'].split(','): a = ascii_filename(book['authors']) t = ascii_filename(book['title']) @@ -124,9 +123,11 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS): ), CLASS('button')) s.tail = u'' - last = s data.append(s) + div = DIV(CLASS('data-container')) + data.append(div) + series = u'[%s - %s]'%(book['series'], book['series_index']) \ if book['series'] else '' tags = u'Tags=[%s]'%book['tags'] if book['tags'] else '' @@ -137,13 +138,13 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS): if val: ctext += '%s=[%s] '%tuple(val.split(':#:')) - text = u'\u202f%s %s by %s - %s - %s %s %s' % (book['title'], series, - book['authors'], book['size'], book['timestamp'], tags, ctext) - - if last is None: - data.text = text - else: - last.tail += text + first = SPAN(u'\u202f%s %s by %s' % (book['title'], series, + book['authors']), CLASS('first-line')) + div.append(first) + second = SPAN(u'%s - %s %s %s' % ( book['size'], + book['timestamp'], + tags, ctext), CLASS('second-line')) + div.append(second) bookt.append(TR(thumbnail, data)) # }}} @@ -229,7 +230,7 @@ class MobileServer(object): no_tag_count=True) book['title'] = record[FM['title']] for x in ('timestamp', 'pubdate'): - book[x] = strftime('%Y/%m/%d %H:%M:%S', record[FM[x]]) + book[x] = strftime('%b, %Y', record[FM[x]]) book['id'] = record[FM['id']] books.append(book) for key in CKEYS: From 7dc8e70b711e5bf2a1fdbbdbe522b88ac31c2b9d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 14:33:19 -0600 Subject: [PATCH 20/69] /browse: Fix sorting on custom cols. Also specify sort order explicitly when sorting on boolean columns --- src/calibre/library/caches.py | 4 ++++ src/calibre/library/server/browse.py | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index c22f9e00b0..300ddbac0b 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -816,6 +816,10 @@ class SortKeyGenerator(object): if val is None: val = '' val = val.lower() + + elif dt == 'bool': + val = {True: 1, False: 2, None: 3}.get(val, 3) + yield val # }}} diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index ea86de4c1b..d8d67c3824 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' import operator, os, json from binascii import hexlify, unhexlify -from urllib import quote +from urllib import quote, unquote import cherrypy @@ -482,6 +482,8 @@ class BrowseServer(object): @Endpoint(sort_type='list') def browse_matches(self, category=None, cid=None, list_sort=None): + if list_sort: + list_sort = unquote(list_sort) if not cid: raise cherrypy.HTTPError(404, 'invalid category id: %r'%cid) categories = self.categories_cache() From 1dd72e682c9825f57331bfb511674d66184d821b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 15:12:18 -0600 Subject: [PATCH 21/69] USBMS drivers: Pass full filepath of ebook file to the upload_cover function --- src/calibre/devices/cybook/driver.py | 2 +- src/calibre/devices/hanvon/driver.py | 4 ++-- src/calibre/devices/misc.py | 2 +- src/calibre/devices/nook/driver.py | 2 +- src/calibre/devices/usbms/driver.py | 10 ++++++---- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/calibre/devices/cybook/driver.py b/src/calibre/devices/cybook/driver.py index d314646a87..7c436a7d0e 100644 --- a/src/calibre/devices/cybook/driver.py +++ b/src/calibre/devices/cybook/driver.py @@ -42,7 +42,7 @@ class CYBOOK(USBMS): DELETE_EXTS = ['.mbp', '.dat', '.bin', '_6090.t2b', '.thn'] SUPPORTS_SUB_DIRS = True - def upload_cover(self, path, filename, metadata): + def upload_cover(self, path, filename, metadata, filepath): coverdata = getattr(metadata, 'thumbnail', None) if coverdata and coverdata[2]: coverdata = coverdata[2] diff --git a/src/calibre/devices/hanvon/driver.py b/src/calibre/devices/hanvon/driver.py index 7933b9885d..1fe18afc58 100644 --- a/src/calibre/devices/hanvon/driver.py +++ b/src/calibre/devices/hanvon/driver.py @@ -77,7 +77,7 @@ class ALEX(N516): name = os.path.splitext(os.path.basename(file_abspath))[0] + '.png' return os.path.join(base, 'covers', name) - def upload_cover(self, path, filename, metadata): + def upload_cover(self, path, filename, metadata, filepath): from calibre.ebooks import calibre_cover from calibre.utils.magick.draw import thumbnail coverdata = getattr(metadata, 'thumbnail', None) @@ -129,7 +129,7 @@ class AZBOOKA(ALEX): def can_handle(self, device_info, debug=False): return not is_alex(device_info) - def upload_cover(self, path, filename, metadata): + def upload_cover(self, path, filename, metadata, filepath): pass class EB511(USBMS): diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index a1c9b790e4..bca4e8ec52 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -102,7 +102,7 @@ class PDNOVEL(USBMS): DELETE_EXTS = ['.jpg', '.jpeg', '.png'] - def upload_cover(self, path, filename, metadata): + def upload_cover(self, path, filename, metadata, filepath): coverdata = getattr(metadata, 'thumbnail', None) if coverdata and coverdata[2]: with open('%s.jpg' % os.path.join(path, filename), 'wb') as coverfile: diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py index f697ee5202..a809b2c08a 100644 --- a/src/calibre/devices/nook/driver.py +++ b/src/calibre/devices/nook/driver.py @@ -45,7 +45,7 @@ class NOOK(USBMS): DELETE_EXTS = ['.jpg'] SUPPORTS_SUB_DIRS = True - def upload_cover(self, path, filename, metadata): + def upload_cover(self, path, filename, metadata, filepath): try: from PIL import Image, ImageDraw Image, ImageDraw diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py index a83a8eb0ea..2f26c4a353 100644 --- a/src/calibre/devices/usbms/driver.py +++ b/src/calibre/devices/usbms/driver.py @@ -186,7 +186,8 @@ class USBMS(CLI, Device): self.put_file(infile, filepath, replace_file=True) try: self.upload_cover(os.path.dirname(filepath), - os.path.splitext(os.path.basename(filepath))[0], mdata) + os.path.splitext(os.path.basename(filepath))[0], + mdata, filepath) except: # Failure to upload cover is not catastrophic import traceback traceback.print_exc() @@ -197,14 +198,15 @@ class USBMS(CLI, Device): debug_print('USBMS: finished uploading %d books'%(len(files))) return zip(paths, cycle([on_card])) - def upload_cover(self, path, filename, metadata): + def upload_cover(self, path, filename, metadata, filepath): ''' Upload book cover to the device. Default implementation does nothing. - :param path: the full path were the associated book is located. - :param filename: the name of the book file without the extension. + :param path: The full path to the directory where the associated book is located. + :param filename: The name of the book file without the extension. :param metadata: metadata belonging to the book. Use metadata.thumbnail for cover + :param filepath: The full path to the ebook file ''' pass From 5fb294486babed1e2471940a4ebee97cc199601f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 16:40:39 -0600 Subject: [PATCH 22/69] SONY driver: Add support for uploading covers and periodicals --- src/calibre/devices/prs505/__init__.py | 6 ++ src/calibre/devices/prs505/driver.py | 49 +++++++-- src/calibre/devices/prs505/sony_cache.py | 128 ++++++++++++++++++++++- 3 files changed, 173 insertions(+), 10 deletions(-) diff --git a/src/calibre/devices/prs505/__init__.py b/src/calibre/devices/prs505/__init__.py index 20f3b8d49b..48b7d98123 100644 --- a/src/calibre/devices/prs505/__init__.py +++ b/src/calibre/devices/prs505/__init__.py @@ -2,5 +2,11 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' MEDIA_XML = 'database/cache/media.xml' +MEDIA_EXT = 'database/cache/cacheExt.xml' CACHE_XML = 'Sony Reader/database/cache.xml' +CACHE_EXT = 'Sony Reader/database/cacheExt.xml' + +MEDIA_THUMBNAIL = 'database/thumbnail' +CACHE_THUMBNAIL = 'Sony Reader/database/thumbnail' + diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index bb62e4dc76..3bcf7715a2 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -9,10 +9,10 @@ Device driver for the SONY devices import os, time, re from calibre.devices.usbms.driver import USBMS, debug_print -from calibre.devices.prs505 import MEDIA_XML -from calibre.devices.prs505 import CACHE_XML +from calibre.devices.prs505 import MEDIA_XML, MEDIA_EXT, CACHE_XML, CACHE_EXT, \ + MEDIA_THUMBNAIL, CACHE_THUMBNAIL from calibre.devices.prs505.sony_cache import XMLCache -from calibre import __appname__ +from calibre import __appname__, prints from calibre.devices.usbms.books import CollectionsBookList class PRS505(USBMS): @@ -66,6 +66,8 @@ class PRS505(USBMS): plugboard = None plugboard_func = None + THUMBNAIL_HEIGHT = 200 + def windows_filter_pnp_id(self, pnp_id): return '_LAUNCHER' in pnp_id @@ -116,20 +118,21 @@ class PRS505(USBMS): return fname def initialize_XML_cache(self): - paths, prefixes = {}, {} - for prefix, path, source_id in [ - ('main', MEDIA_XML, 0), - ('card_a', CACHE_XML, 1), - ('card_b', CACHE_XML, 2) + paths, prefixes, ext_paths = {}, {}, {} + for prefix, path, ext_path, source_id in [ + ('main', MEDIA_XML, MEDIA_EXT, 0), + ('card_a', CACHE_XML, CACHE_EXT, 1), + ('card_b', CACHE_XML, CACHE_EXT, 2) ]: prefix = getattr(self, '_%s_prefix'%prefix) if prefix is not None and os.path.exists(prefix): paths[source_id] = os.path.join(prefix, *(path.split('/'))) + ext_paths[source_id] = os.path.join(prefix, *(ext_path.split('/'))) prefixes[source_id] = prefix d = os.path.dirname(paths[source_id]) if not os.path.exists(d): os.makedirs(d) - return XMLCache(paths, prefixes, self.settings().use_author_sort) + return XMLCache(paths, ext_paths, prefixes, self.settings().use_author_sort) def books(self, oncard=None, end_session=True): debug_print('PRS505: starting fetching books for card', oncard) @@ -174,3 +177,31 @@ class PRS505(USBMS): def set_plugboards(self, plugboards, pb_func): self.plugboards = plugboards self.plugboard_func = pb_func + + def upload_cover(self, path, filename, metadata, filepath): + if metadata.thumbnail and metadata.thumbnail[-1]: + path = path.replace('/', os.sep) + is_main = path.startswith(self._main_prefix) + thumbnail_dir = MEDIA_THUMBNAIL if is_main else CACHE_THUMBNAIL + prefix = None + if is_main: + prefix = self._main_prefix + else: + if self._card_a_prefix and \ + path.startswith(self._card_a_prefix): + prefix = self._card_a_prefix + elif self._card_b_prefix and \ + path.startswith(self._card_b_prefix): + prefix = self._card_b_prefix + if prefix is None: + prints('WARNING: Failed to find prefix for:', filepath) + return + thumbnail_dir = os.path.join(prefix, *thumbnail_dir.split('/')) + + relpath = os.path.relpath(filepath, prefix) + thumbnail_dir = os.path.join(thumbnail_dir, relpath) + if not os.path.exists(thumbnail_dir): + os.makedirs(thumbnail_dir) + with open(os.path.join(thumbnail_dir, 'main_thumbnail.jpg'), 'wb') as f: + f.write(metadata.thumbnail[-1]) + diff --git a/src/calibre/devices/prs505/sony_cache.py b/src/calibre/devices/prs505/sony_cache.py index ce24dcd03f..e272cd6372 100644 --- a/src/calibre/devices/prs505/sony_cache.py +++ b/src/calibre/devices/prs505/sony_cache.py @@ -9,6 +9,7 @@ import os, time from base64 import b64decode from uuid import uuid4 from lxml import etree +from datetime import date from calibre import prints, guess_type, isbytestring from calibre.devices.errors import DeviceError @@ -18,6 +19,20 @@ from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.metadata import authors_to_string, title_sort, \ authors_to_sort_string +''' +cahceExt.xml + +Periodical identifier sample from a PRS-650: + + + + + main_thumbnail.jpg + + + +''' + # Utility functions {{{ EMPTY_CARD_CACHE = '''\ @@ -25,6 +40,12 @@ EMPTY_CARD_CACHE = '''\ ''' +EMPTY_EXT_CACHE = '''\ + + + +''' + MIME_MAP = { "lrf" : "application/x-sony-bbeb", 'lrx' : 'application/x-sony-bbeb', @@ -63,7 +84,7 @@ def uuid(): class XMLCache(object): - def __init__(self, paths, prefixes, use_author_sort): + def __init__(self, paths, ext_paths, prefixes, use_author_sort): if DEBUG: debug_print('Building XMLCache...', paths) self.paths = paths @@ -85,6 +106,7 @@ class XMLCache(object): if os.access(path, os.R_OK): with open(path, 'rb') as f: raw = f.read() + self.roots[source_id] = etree.fromstring(xml_to_unicode( raw, strip_encoding_pats=True, assume_utf8=True, verbose=DEBUG)[0], @@ -93,6 +115,25 @@ class XMLCache(object): raise Exception(('The SONY database at %s is corrupted. Try ' ' disconnecting and reconnecting your reader.')%path) + self.ext_paths, self.ext_roots = {}, {} + for source_id, path in ext_paths.items(): + if not os.path.exists(path): + try: + with open(path, 'wb') as f: + f.write(EMPTY_EXT_CACHE) + except: + pass + if os.access(path, os.W_OK): + try: + with open(path, 'rb') as f: + self.ext_roots[source_id] = etree.fromstring( + xml_to_unicode(f.read(), + strip_encoding_pats=True, assume_utf8=True, + verbose=DEBUG)[0], parser=parser) + self.ext_paths[source_id] = path + except: + pass + # }}} recs = self.roots[0].xpath('//*[local-name()="records"]') @@ -352,12 +393,18 @@ class XMLCache(object): debug_print('Updating XML Cache:', i) root = self.record_roots[i] lpath_map = self.build_lpath_map(root) + ext_root = self.ext_roots[i] if i in self.ext_roots else None + ext_lpath_map = None + if ext_root is not None: + ext_lpath_map = self.build_lpath_map(ext_root) gtz_count = ltz_count = 0 use_tz_var = False for book in booklist: path = os.path.join(self.prefixes[i], *(book.lpath.split('/'))) record = lpath_map.get(book.lpath, None) + created = False if record is None: + created = True record = self.create_text_record(root, i, book.lpath) if plugboard is not None: newmi = book.deepcopy_metadata() @@ -373,6 +420,13 @@ class XMLCache(object): if book.device_collections is None: book.device_collections = [] book.device_collections = playlist_map.get(book.lpath, []) + + if created and ext_root is not None and \ + ext_lpath_map.get(book.lpath, None) is None: + ext_record = self.create_ext_text_record(ext_root, i, + book.lpath, book.thumbnail) + self.periodicalize_book(book, ext_record) + debug_print('Timezone votes: %d GMT, %d LTZ, use_tz_var=%s'% (gtz_count, ltz_count, use_tz_var)) self.update_playlists(i, root, booklist, collections_attributes) @@ -386,6 +440,47 @@ class XMLCache(object): self.fix_ids() debug_print('Finished update') + def is_sony_periodical(self, book): + if _('News') not in book.tags: + return False + if not book.lpath.lower().endswith('.epub'): + return False + if book.pubdate.date() < date(2010, 10, 17): + return False + return True + + def periodicalize_book(self, book, record): + if not self.is_sony_periodical(book): + return + record.set('conformsTo', + "http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0") + + record.set('description', '') + + name = None + if '[' in book.title: + name = book.title.split('[')[0].strip() + if len(name) < 4: + name = None + if not name: + try: + name = [t for t in book.tags if t != _('News')][0] + except: + name = None + + if not name: + name = book.title + + record.set('periodicalName', name) + + try: + pubdate = strftime(book.pubdate.utctimetuple(), + zone=lambda x : x) + record.set('publicationDate', pubdate) + except: + pass + + def rebuild_collections(self, booklist, bl_index): if bl_index not in self.record_roots: return @@ -472,6 +567,25 @@ class XMLCache(object): root.append(ans) return ans + def create_ext_text_record(self, root, bl_id, lpath, thumbnail): + namespace = root.nsmap[None] + attrib = { 'path': lpath } + ans = root.makeelement('{%s}text'%namespace, attrib=attrib, + nsmap=root.nsmap) + ans.tail = '\n' + root[-1].tail = '\n' + '\t' + root.append(ans) + if thumbnail and thumbnail[-1]: + ans.text = '\n' + '\t\t' + t = root.makeelement('{%s}thumbnail'%namespace, + attrib={'width':str(thumbnail[0]), 'height':str(thumbnail[1])}, + nsmap=root.nsmap) + t.text = 'main_thumbnail.jpg' + ans.append(t) + t.tail = '\n\t' + return ans + + def update_text_record(self, record, book, path, bl_index, gtz_count, ltz_count, use_tz_var): ''' @@ -589,6 +703,18 @@ class XMLCache(object): '') with open(path, 'wb') as f: f.write(raw) + + for i, path in self.ext_paths.items(): + try: + raw = etree.tostring(self.ext_roots[i], encoding='UTF-8', + xml_declaration=True) + except: + continue + raw = raw.replace("", + '') + with open(path, 'wb') as f: + f.write(raw) + # }}} # Utility methods {{{ From 2bd9f63f29248eba346c22eeb691977591d24069 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 16:53:51 -0600 Subject: [PATCH 23/69] Linux device mounting: Mount the drive with the lowest kernel name as main memory --- src/calibre/devices/prs505/sony_cache.py | 6 +++--- src/calibre/devices/usbms/device.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/prs505/sony_cache.py b/src/calibre/devices/prs505/sony_cache.py index e272cd6372..15245d3cd5 100644 --- a/src/calibre/devices/prs505/sony_cache.py +++ b/src/calibre/devices/prs505/sony_cache.py @@ -97,8 +97,8 @@ class XMLCache(object): for source_id, path in paths.items(): if source_id == 0: if not os.path.exists(path): - raise DeviceError('The SONY XML cache media.xml does not exist. Try' - ' disconnecting and reconnecting your reader.') + raise DeviceError(('The SONY XML cache %r does not exist. Try' + ' disconnecting and reconnecting your reader.')%repr(path)) with open(path, 'rb') as f: raw = f.read() else: @@ -112,7 +112,7 @@ class XMLCache(object): verbose=DEBUG)[0], parser=parser) if self.roots[source_id] is None: - raise Exception(('The SONY database at %s is corrupted. Try ' + raise Exception(('The SONY database at %r is corrupted. Try ' ' disconnecting and reconnecting your reader.')%path) self.ext_paths, self.ext_roots = {}, {} diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 6f938cbcbd..aa4f0d06f4 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -523,7 +523,8 @@ class Device(DeviceConfig, DevicePlugin): devnodes.append(node) devnodes += list(repeat(None, 3)) - ans = tuple(['/dev/'+x if ok.get(x, False) else None for x in devnodes[:3]]) + ans = ['/dev/'+x if ok.get(x, False) else None for x in devnodes[:3]] + ans.sort(key=lambda x: x[5:] if x else 'zzzzz') return self.linux_swap_drives(ans) def linux_swap_drives(self, drives): From 8b0b1312e311cc4bc2eaa4d65bee0f6c96aa2b13 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 17:19:54 -0600 Subject: [PATCH 24/69] Linux device drivers: Fix udisks based ejecting for devices with multiple nodes --- src/calibre/devices/udisks.py | 20 ++++++-------- src/calibre/devices/usbms/device.py | 41 +++++++++++++++++------------ 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/calibre/devices/udisks.py b/src/calibre/devices/udisks.py index ba26c2b56c..d79b626f36 100644 --- a/src/calibre/devices/udisks.py +++ b/src/calibre/devices/udisks.py @@ -5,8 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import dbus -import os +import dbus, os def node_mountpoint(node): @@ -56,15 +55,6 @@ class UDisks(object): parent = device_node_path while parent[-1] in '0123456789': parent = parent[:-1] - devices = [str(x) for x in self.main.EnumerateDeviceFiles()] - for d in devices: - if d.startswith(parent) and d != parent: - try: - self.unmount(d) - except: - import traceback - print 'Failed to unmount:', d - traceback.print_exc() d = self.device(parent) d.DriveEject([]) @@ -76,13 +66,19 @@ def eject(node_path): u = UDisks() u.eject(node_path) +def umount(node_path): + u = UDisks() + u.unmount(node_path) + if __name__ == '__main__': import sys dev = sys.argv[1] print 'Testing with node', dev u = UDisks() print 'Mounted at:', u.mount(dev) - print 'Ejecting' + print 'Unmounting' + u.unmount(dev) + print 'Ejecting:' u.eject(dev) diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index aa4f0d06f4..94744c521f 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -733,24 +733,31 @@ class Device(DeviceConfig, DevicePlugin): pass def eject_linux(self): - try: - from calibre.devices.udisks import eject - return eject(self._linux_main_device_node) - except: - pass - drives = self.find_device_nodes() + from calibre.devices.udisks import eject, umount + drives = [d for d in self.find_device_nodes() if d] + for d in drives: + try: + umount(d) + except: + pass + for d in drives: + try: + eject(d) + except Exception, e: + print 'Udisks eject call for:', d, 'failed:' + print '\t', str(e) + for drive in drives: - if drive: - cmd = 'calibre-mount-helper' - if getattr(sys, 'frozen_path', False): - cmd = os.path.join(sys.frozen_path, cmd) - cmd = [cmd, 'eject'] - mp = getattr(self, "_linux_mount_map", {}).get(drive, - 'dummy/')[:-1] - try: - subprocess.Popen(cmd + [drive, mp]).wait() - except: - pass + cmd = 'calibre-mount-helper' + if getattr(sys, 'frozen_path', False): + cmd = os.path.join(sys.frozen_path, cmd) + cmd = [cmd, 'eject'] + mp = getattr(self, "_linux_mount_map", {}).get(drive, + 'dummy/')[:-1] + try: + subprocess.Popen(cmd + [drive, mp]).wait() + except: + pass def eject(self): if islinux: From 4906e1fc420fc160e90fc4e329feaf2c2ee9eb7f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 17:21:15 -0600 Subject: [PATCH 25/69] ... --- src/calibre/devices/usbms/device.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 94744c521f..f085ab8989 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -745,7 +745,7 @@ class Device(DeviceConfig, DevicePlugin): eject(d) except Exception, e: print 'Udisks eject call for:', d, 'failed:' - print '\t', str(e) + print '\t', e for drive in drives: cmd = 'calibre-mount-helper' From 5a8e4b2174e95527fa5b6a09c496a17015b4c222 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 18:26:37 -0600 Subject: [PATCH 26/69] ... --- resources/recipes/cacm.recipe | 74 +++++++++++++++++------------------ 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/resources/recipes/cacm.recipe b/resources/recipes/cacm.recipe index 1618bae742..e4af9d2024 100644 --- a/resources/recipes/cacm.recipe +++ b/resources/recipes/cacm.recipe @@ -1,37 +1,37 @@ -import datetime -from calibre.web.feeds.news import BasicNewsRecipe - -class AdvancedUserRecipe1286242553(BasicNewsRecipe): - title = u'CACM' - oldest_article = 7 - max_articles_per_feed = 100 - needs_subscription = True - feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')] - language = 'en' - __author__ = 'jonmisurda' - no_stylesheets = True - remove_tags = [ - dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \ - 'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']}) - ] - cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d' - - def get_browser(self): - br = BasicNewsRecipe.get_browser() - if self.username is not None and self.password is not None: - br.open('https://cacm.acm.org/login') - br.select_form(nr=1) - br['current_member[user]'] = self.username - br['current_member[passwd]'] = self.password - br.submit() - return br - - def get_cover_url(self): - now = datetime.datetime.now() - - cover_url = None - soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month)) - cover_item = soup.find('img',attrs={'alt':'magazine cover image'}) - if cover_item: - cover_url = cover_item['src'] - return cover_url +import datetime +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1286242553(BasicNewsRecipe): + title = u'CACM' + oldest_article = 7 + max_articles_per_feed = 100 + needs_subscription = True + feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')] + language = 'en' + __author__ = 'jonmisurda' + no_stylesheets = True + remove_tags = [ + dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \ + 'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']}) + ] + cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d' + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('https://cacm.acm.org/login') + br.select_form(nr=1) + br['current_member[user]'] = self.username + br['current_member[passwd]'] = self.password + br.submit() + return br + + def get_cover_url(self): + now = datetime.datetime.now() + + cover_url = None + soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month)) + cover_item = soup.find('img',attrs={'alt':'magazine cover image'}) + if cover_item: + cover_url = cover_item['src'] + return cover_url From 1c2ff97a5d8b64edffc80d42a0e64ea152a94842 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Oct 2010 19:06:31 -0600 Subject: [PATCH 27/69] ... --- src/calibre/devices/usbms/device.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index f085ab8989..f826167d16 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -740,12 +740,17 @@ class Device(DeviceConfig, DevicePlugin): umount(d) except: pass + failures = False for d in drives: try: eject(d) except Exception, e: print 'Udisks eject call for:', d, 'failed:' print '\t', e + failures = True + + if not failures: + return for drive in drives: cmd = 'calibre-mount-helper' From abc1f7525c8a2af18a714ee57944c720750b7ec7 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Wed, 20 Oct 2010 12:28:55 +0100 Subject: [PATCH 28/69] Fix restore not to die when conflicting custom columns are encountered. --- src/calibre/library/restore.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/library/restore.py b/src/calibre/library/restore.py index 16aba3aebd..748d60b0b2 100644 --- a/src/calibre/library/restore.py +++ b/src/calibre/library/restore.py @@ -170,8 +170,8 @@ class Restore(Thread): label = cfm['label'] if label in m and args != m[label]: if label not in self.conflicting_custom_cols: - self.conflicting_custom_cols[label] = set([m[label]]) - self.conflicting_custom_cols[label].add(args) + self.conflicting_custom_cols[label] = [] + self.conflicting_custom_cols[label].append(args) m[cfm['label']] = args db = RestoreDatabase(self.library_path) From 8f371fdd4f1279375e5cfa5a79be73dc4c0b40b0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 20 Oct 2010 22:12:50 -0600 Subject: [PATCH 29/69] /browse: Force AJAX requests to never be cached as the user has no way of requesting a refresh for them. --- resources/content_server/browse/browse.js | 3 +++ src/calibre/library/server/browse.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/resources/content_server/browse/browse.js b/resources/content_server/browse/browse.js index 5e3cee14c0..e7e359c163 100644 --- a/resources/content_server/browse/browse.js +++ b/resources/content_server/browse/browse.js @@ -156,6 +156,7 @@ function category() { if (href) { $.ajax({ url:href, + cache: false, data:{'sort':cookie(sort_cookie_name)}, success: function(data) { this.children(".loaded").html(data); @@ -212,6 +213,7 @@ function load_page(elem) { url: href, context: elem, dataType: "json", + cache : false, type: 'POST', timeout: 600000, //milliseconds (10 minutes) data: {'ids': ids}, @@ -263,6 +265,7 @@ function show_details(a_dom) { $.ajax({ url: book.find('.details-href').attr('title'), context: bd, + cache: false, dataType: "json", timeout: 600000, //milliseconds (10 minutes) error: function(xhr, stat, err) { diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index d8d67c3824..463fcd6fde 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -168,6 +168,9 @@ class Endpoint(object): # {{{ sort_val = cookie[eself.sort_cookie_name].value kwargs[eself.sort_kwarg] = sort_val + # Remove AJAX caching disabling jquery workaround arg + kwargs.pop('_', None) + ans = func(self, *args, **kwargs) cherrypy.response.headers['Content-Type'] = eself.mimetype updated = self.db.last_modified() From 5fc0d687307bb921c35076bcd92bbcc87d25906a Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Thu, 21 Oct 2010 09:54:21 +0100 Subject: [PATCH 30/69] Improve conflicting custom column error reporting when restoring a database --- src/calibre/library/restore.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/calibre/library/restore.py b/src/calibre/library/restore.py index 748d60b0b2..bc2c740279 100644 --- a/src/calibre/library/restore.py +++ b/src/calibre/library/restore.py @@ -71,9 +71,17 @@ class Restore(Thread): if self.conflicting_custom_cols: ans += '\n\n' - ans += 'The following custom columns were not fully restored:\n' + ans += 'The following custom columns have conflicting definitions ' \ + 'and were not fully restored:\n' for x in self.conflicting_custom_cols: ans += '\t#'+x+'\n' + ans += '\tused:\t%s, %s, %s, %s\n'%(self.custom_columns[x][1], + self.custom_columns[x][2], + self.custom_columns[x][3], + self.custom_columns[x][5]) + for coldef in self.conflicting_custom_cols[x]: + ans += '\tother:\t%s, %s, %s, %s\n'%(coldef[1], coldef[2], + coldef[3], coldef[5]) if self.mismatched_dirs: ans += '\n\n' @@ -152,7 +160,7 @@ class Restore(Thread): def create_cc_metadata(self): self.books.sort(key=itemgetter('timestamp')) - m = {} + self.custom_columns = {} fields = ('label', 'name', 'datatype', 'is_multiple', 'is_editable', 'display') for b in self.books: @@ -168,16 +176,17 @@ class Restore(Thread): if len(args) == len(fields): # TODO: Do series type columns need special handling? label = cfm['label'] - if label in m and args != m[label]: + if label in self.custom_columns and args != self.custom_columns[label]: if label not in self.conflicting_custom_cols: self.conflicting_custom_cols[label] = [] - self.conflicting_custom_cols[label].append(args) - m[cfm['label']] = args + if self.custom_columns[label] not in self.conflicting_custom_cols[label]: + self.conflicting_custom_cols[label].append(self.custom_columns[label]) + self.custom_columns[label] = args db = RestoreDatabase(self.library_path) - self.progress_callback(None, len(m)) - if len(m): - for i,args in enumerate(m.values()): + self.progress_callback(None, len(self.custom_columns)) + if len(self.custom_columns): + for i,args in enumerate(self.custom_columns.values()): db.create_custom_column(*args) self.progress_callback(_('creating custom column ')+args[0], i+1) db.conn.close() From 506cd50dd1774b3961150b3a97d132a7c1cd0c1e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 21 Oct 2010 07:49:13 -0700 Subject: [PATCH 31/69] Fix #7228 (The Economic Times of India - News Headers are missing) --- resources/recipes/theeconomictimes_india.recipe | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/resources/recipes/theeconomictimes_india.recipe b/resources/recipes/theeconomictimes_india.recipe index 59cd56b67e..92d2a64a70 100644 --- a/resources/recipes/theeconomictimes_india.recipe +++ b/resources/recipes/theeconomictimes_india.recipe @@ -21,8 +21,9 @@ class TheEconomicTimes(BasicNewsRecipe): language = 'en_IN' publication_type = 'newspaper' masthead_url = 'http://economictimes.indiatimes.com/photo/2676871.cms' - extra_css = """ body{font-family: Arial,Helvetica,sans-serif} - .heading1{font-size: xx-large; font-weight: bold} """ + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif} + """ conversion_options = { 'comment' : description @@ -31,8 +32,9 @@ class TheEconomicTimes(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [dict(attrs={'class':['heading1','headingnext','Normal']})] + keep_only_tags = [dict(attrs={'class':'printdiv'})] remove_tags = [dict(name=['object','link','embed','iframe','base','table','meta'])] + remove_attributes = ['name'] feeds = [(u'All articles', u'http://economictimes.indiatimes.com/rssfeedsdefault.cms')] From 6dce871b053c803f4a0283ad1929fb8e375ce82c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 21 Oct 2010 07:50:22 -0700 Subject: [PATCH 32/69] Fix #7187 (New Scientist recipe update) --- resources/recipes/new_scientist.recipe | 29 +++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/resources/recipes/new_scientist.recipe b/resources/recipes/new_scientist.recipe index 2e864565ff..02bbbe4d42 100644 --- a/resources/recipes/new_scientist.recipe +++ b/resources/recipes/new_scientist.recipe @@ -8,11 +8,11 @@ import re from calibre.web.feeds.news import BasicNewsRecipe class NewScientist(BasicNewsRecipe): - title = 'New Scientist - Online News' + title = 'New Scientist - Online News w. subscription' __author__ = 'Darko Miletic' description = 'Science news and science articles from New Scientist.' language = 'en' - publisher = 'New Scientist' + publisher = 'Reed Business Information Ltd.' category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software' oldest_article = 7 max_articles_per_feed = 100 @@ -21,7 +21,12 @@ class NewScientist(BasicNewsRecipe): cover_url = 'http://www.newscientist.com/currentcover.jpg' masthead_url = 'http://www.newscientist.com/img/misc/ns_logo.jpg' encoding = 'utf-8' - extra_css = ' body{font-family: Arial,sans-serif} img{margin-bottom: 0.8em} ' + needs_subscription = 'optional' + extra_css = """ + body{font-family: Arial,sans-serif} + img{margin-bottom: 0.8em} + .quotebx{font-size: x-large; font-weight: bold; margin-right: 2em; margin-left: 2em} + """ conversion_options = { 'comment' : description @@ -33,15 +38,27 @@ class NewScientist(BasicNewsRecipe): keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})] + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.open('http://www.newscientist.com/') + if self.username is not None and self.password is not None: + br.open('https://www.newscientist.com/user/login?redirectURL=') + br.select_form(nr=2) + br['loginId' ] = self.username + br['password'] = self.password + br.submit() + return br + remove_tags = [ dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]}) ,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools','comments','blgsocial','sharebtns']}) ,dict(name='p' , attrs={'class':['marker','infotext' ]}) ,dict(name='meta' , attrs={'name' :'description' }) - ,dict(name='a' , attrs={'rel' :'tag' }) + ,dict(name='a' , attrs={'rel' :'tag' }) + ,dict(name=['link','base','meta','iframe','object','embed']) ] remove_tags_after = dict(attrs={'class':['nbpcopy','comments']}) - remove_attributes = ['height','width'] + remove_attributes = ['height','width','lang'] feeds = [ (u'Latest Headlines' , u'http://feeds.newscientist.com/science-news' ) @@ -62,6 +79,8 @@ class NewScientist(BasicNewsRecipe): return url + '?full=true&print=true' def preprocess_html(self, soup): + for item in soup.findAll(['quote','quotetext']): + item.name='p' for tg in soup.findAll('a'): if tg.string == 'Home': tg.parent.extract() From eafc6e72c11864f6b0d084938d940624cc704144 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 21 Oct 2010 07:51:31 -0700 Subject: [PATCH 33/69] Fix #7180 (Fox news website changed) --- resources/recipes/foxnews.recipe | 39 +++++++++----------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/resources/recipes/foxnews.recipe b/resources/recipes/foxnews.recipe index e7e76390b5..916bd28ad2 100644 --- a/resources/recipes/foxnews.recipe +++ b/resources/recipes/foxnews.recipe @@ -4,7 +4,6 @@ __copyright__ = '2010, Darko Miletic ' foxnews.com ''' -import re from calibre.web.feeds.news import BasicNewsRecipe class FoxNews(BasicNewsRecipe): @@ -21,11 +20,10 @@ class FoxNews(BasicNewsRecipe): language = 'en' publication_type = 'newsportal' remove_empty_feeds = True - extra_css = ' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em} .caption{font-size: x-small} ' - - preprocess_regexps = [ - (re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '') - ] + extra_css = """ + body{font-family: Arial,sans-serif } + .caption{font-size: x-small} + """ conversion_options = { 'comment' : description @@ -34,27 +32,15 @@ class FoxNews(BasicNewsRecipe): , 'language' : language } - remove_attributes = ['xmlns'] - - keep_only_tags = [ - dict(name='div', attrs={'id' :['story','browse-story-content']}) - ,dict(name='div', attrs={'class':['posts articles','slideshow']}) - ,dict(name='h4' , attrs={'class':'storyDate'}) - ,dict(name='h1' , attrs={'xmlns:functx':'http://www.functx.com'}) - ,dict(name='div', attrs={'class':'authInfo'}) - ,dict(name='div', attrs={'id':'articleCont'}) - ] + remove_attributes = ['xmlns','lang'] remove_tags = [ - dict(name='div', attrs={'class':['share-links','quigo quigo2','share-text','storyControls','socShare','btm-links']}) - ,dict(name='div', attrs={'id' :['otherMedia','loomia_display','img-all-path','story-vcmId','story-url','pane-browse-story-comments','story_related']}) - ,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2','tabs']}) - ,dict(name='a' , attrs={'class':'join-discussion'}) - ,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2']}) - ,dict(name='p' , attrs={'class':'see_fullarchive'}) - ,dict(name=['object','embed','link','script']) + dict(name=['object','embed','link','script','iframe','meta','base']) + ,dict(attrs={'class':['user-control','url-description','ad-context']}) ] + remove_tags_before=dict(name='h1') + remove_tags_after =dict(attrs={'class':'url-description'}) feeds = [ (u'Latest Headlines', u'http://feeds.foxnews.com/foxnews/latest' ) @@ -67,8 +53,5 @@ class FoxNews(BasicNewsRecipe): ,(u'Entertainment' , u'http://feeds.foxnews.com/foxnews/entertainment' ) ] - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) - + def print_version(self, url): + return url + 'print' From 0559826083bf6f653aba88e6bbbedb67d1288064 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 21 Oct 2010 07:53:53 -0700 Subject: [PATCH 34/69] Fix #7147 (Auto merge books not respecting article sort tweak) --- src/calibre/library/database2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index bbfef47977..b21299c335 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -748,10 +748,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return False def find_identical_books(self, mi): - fuzzy_title_patterns = [(re.compile(pat), repl) for pat, repl in + fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in [ (r'[\[\](){}<>\'";,:#]', ''), - (r'^(the|a|an) ', ''), + (tweaks.get('title_sort_articles', r'^(a|the|an)\s+'), ''), (r'[-._]', ' '), (r'\s+', ' ') ] From 2a2f1fd988e6e35ef4c71d5b9c2343c1c0570ba7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 21 Oct 2010 07:56:16 -0700 Subject: [PATCH 35/69] Fix #7252 (New recipe for The Economic Collapse blog) --- resources/images/news/theecocolapse.png | Bin 0 -> 1264 bytes resources/recipes/theecocolapse.recipe | 46 ++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 resources/images/news/theecocolapse.png create mode 100644 resources/recipes/theecocolapse.recipe diff --git a/resources/images/news/theecocolapse.png b/resources/images/news/theecocolapse.png new file mode 100644 index 0000000000000000000000000000000000000000..1c45ec14bf41c20c9a86311f4af9ccd47de799df GIT binary patch literal 1264 zcmeAS@N?(olHy`uVBq!ia0vp^0w65F1|AWFkDz^H9*yf;%kG#rN>m48Y1xyIaN$&n8)EXe}nRBoyVD>IS z-#0E7udq$NamUy=eV^+KH%`xcPal5RbAIN}x!)5mr1bXgt$JF-%Ohi%WF%|!zx&R;|JQGI^}YApDl0ZK{r-0G%fG*Q+fTX#rfrV9f6p&ajN4bS?jC#klu<6-#|8AM~r)OlO zJ^Pf}w(|JVSGhY%vkMC=DoVC2f4={JRrP$kP%TcUwfAy44t(Ya)nYJ`>^q&purp@q zr1Q)vo1Ye$ac|fp)UU5!z4!9v=4So4JG;M|&tIW_;cBYUT#JVvS6nT!bY&FbS`?wd za3tyFm+kp7)4fEySBe~$DhUC z86ByfK?^TrWJnz^_m9wc^M;}3#{>@-rMq|S?mu{Ndiu*ae9gttq|&K${No;tFf9fSPa!7;t1@O3^UH^>Z(n?8@HPOWB%ilC%MHXo?TE}{PDrTgbQhl zGiUcEvN#H`Fgh%sJ^y+ZJKMHxlTN;U+sf)WNyYP##p? zyz*Y2y64V0Dw(sEg=(!8YfxEz-DtAr^vjtITerF-)dE4XvQ)22-o2Q(eZ|l3?six( z&Ec}5jLe+5rB;U@&Xb${_R31*^s@U+SFg(3hlShM&6&5aBr|Q<>eYd9cLV>F|NS-9 zCok8wUamXv%5?p^cR7HbW&HDJ=hmqndnzyMysx+8IJeT=>Ei}qiJ@BJ8c~vxSdwa$ zT$Bo=7>o=I4RnFX$S}mf+{)C%%Fsd=$S^Rt;`i$jiiX_$l+3hB6b+VECWclfW)KYt Sz52mG4Gf;HelF{r5}E+n8aB)T literal 0 HcmV?d00001 diff --git a/resources/recipes/theecocolapse.recipe b/resources/recipes/theecocolapse.recipe new file mode 100644 index 0000000000..6743ca68b5 --- /dev/null +++ b/resources/recipes/theecocolapse.recipe @@ -0,0 +1,46 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +theeconomiccollapseblog.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +class TheEconomicCollapse(BasicNewsRecipe): + title = 'The Economic Collapse' + __author__ = 'Darko Miletic' + description = 'Are You Prepared For The Coming Economic Collapse And The Next Great Depression?' + publisher = 'The Economic Collapse' + category = 'news, politics, USA, economy' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en' + remove_empty_feeds = True + extra_css = """ + body{font-family: Tahoma,Arial,sans-serif } + img{margin-bottom: 0.4em} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [ + dict(attrs={'class':'sociable'}) + ,dict(name=['iframe','object','embed','meta','link','base']) + ] + remove_attributes=['lang','onclick','width','height'] + keep_only_tags=[dict(attrs={'class':['post-headline','post-bodycopy clearfix','']})] + + feeds = [(u'Posts', u'http://theeconomiccollapseblog.com/feed')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return self.adeify_images(soup) + From 91f8c368c19e7d47019694457977911e4eba28e7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 21 Oct 2010 07:59:42 -0700 Subject: [PATCH 36/69] Fix #7249 (updated "el_pais" recipe, corrects disappearing titles) --- resources/recipes/el_pais.recipe | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/resources/recipes/el_pais.recipe b/resources/recipes/el_pais.recipe index 1e2164b2af..2e358060b8 100644 --- a/resources/recipes/el_pais.recipe +++ b/resources/recipes/el_pais.recipe @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' -description = 'Main daily newspaper from Spain - v1.03 (03, September 2010)' +description = 'Main daily newspaper from Spain - v1.04 (19, October 2010)' __docformat__ = 'restructuredtext en' ''' @@ -32,19 +32,16 @@ class ElPais(BasicNewsRecipe): remove_javascript = True no_stylesheets = True - keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})] - - extra_css = ''' - p{style:normal size:12 serif} + keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})] - ''' + extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} ' remove_tags = [ dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}), - dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}), + dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos estirar','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}), dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}), dict(name='p', attrs={'class':'nav_meses'}), - dict(attrs={'class':['enlaces_m','miniaturas_m']}) + dict(attrs={'class':['enlaces_m','miniaturas_m','nav_miniaturas_m']}) ] feeds = [ From 77284f75284ae7e5f8eb0823cd1f406316e0280b Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Thu, 21 Oct 2010 21:09:47 +0100 Subject: [PATCH 37/69] Fix exceptions when referencing invalid fields XXX_index --- src/calibre/ebooks/metadata/book/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 593e161df7..9286226a3e 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -501,13 +501,15 @@ class Metadata(object): if key.startswith('#') and key.endswith('_index'): tkey = key[:-6] # strip the _index cmeta = self.get_user_metadata(tkey, make_copy=False) - if cmeta['datatype'] == 'series': + if cmeta and cmeta['datatype'] == 'series': if self.get(tkey): res = self.get_extra(tkey) return (unicode(cmeta['name']+'_index'), self.format_series_index(res), res, cmeta) else: return (unicode(cmeta['name']+'_index'), '', '', cmeta) + else: + return (key, key, None, None) if key in self.custom_field_keys(): res = self.get(key, None) From 6375d4cc451ec5a3d28a078e4170134321c2bb86 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Thu, 21 Oct 2010 21:14:04 +0100 Subject: [PATCH 38/69] Fix the fix --- src/calibre/ebooks/metadata/book/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 9286226a3e..125cd542b8 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -508,8 +508,6 @@ class Metadata(object): self.format_series_index(res), res, cmeta) else: return (unicode(cmeta['name']+'_index'), '', '', cmeta) - else: - return (key, key, None, None) if key in self.custom_field_keys(): res = self.get(key, None) From eccb70cede67d608164b51f9393958bea79fcfc2 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 22 Oct 2010 10:00:24 +0100 Subject: [PATCH 39/69] Fix bulk edit of bool columns when the tristate tweak is set to no --- src/calibre/gui2/custom_column_widgets.py | 33 ++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py index 3be4c19d17..3103d7c459 100644 --- a/src/calibre/gui2/custom_column_widgets.py +++ b/src/calibre/gui2/custom_column_widgets.py @@ -429,7 +429,38 @@ class BulkBase(Base): self.db.set_custom_bulk(book_ids, val, num=self.col_id, notify=notify) class BulkBool(BulkBase, Bool): - pass + + def get_initial_value(self, book_ids): + value = None + for book_id in book_ids: + val = self.db.get_custom(book_id, num=self.col_id, index_is_id=True) + if tweaks['bool_custom_columns_are_tristate'] == 'no' and val is None: + val = False + if value is not None and value != val: + return None + value = val + return value + + def setup_ui(self, parent): + self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent), + QComboBox(parent)] + w = self.widgets[1] + items = [_('Yes'), _('No'), _('Undefined')] + icons = [I('ok.png'), I('list_remove.png'), I('blank.png')] + for icon, text in zip(icons, items): + w.addItem(QIcon(icon), text) + + def setter(self, val): + val = {None: 2, False: 1, True: 0}[val] + self.widgets[1].setCurrentIndex(val) + + def commit(self, book_ids, notify=False): + val = self.gui_val + val = self.normalize_ui_val(val) + if val != self.initial_val: + if tweaks['bool_custom_columns_are_tristate'] == 'no' and val is None: + val = False + self.db.set_custom_bulk(book_ids, val, num=self.col_id, notify=notify) class BulkInt(BulkBase, Int): pass From 8ad31bbf087b7503e94cd5b142a2c8af514ca9d7 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 22 Oct 2010 12:31:56 +0100 Subject: [PATCH 40/69] Change bulk edit and the template function titlecase to use utils.titlecase instead of str.title() --- src/calibre/gui2/dialogs/metadata_bulk.py | 7 ++++--- src/calibre/utils/formatter.py | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index e27f4b5eab..de62f20de0 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -16,6 +16,7 @@ from calibre.gui2.custom_column_widgets import populate_metadata_page from calibre.gui2 import error_dialog from calibre.gui2.progress_indicator import ProgressIndicator from calibre.utils.config import dynamic +from calibre.utils.titlecase import titlecase class MyBlockingBusy(QDialog): @@ -115,7 +116,7 @@ class MyBlockingBusy(QDialog): aum = [a.strip().replace('|', ',') for a in aum.split(',')] new_title = authors_to_string(aum) if do_title_case: - new_title = new_title.title() + new_title = titlecase(new_title) self.db.set_title(id, new_title, notify=False) title_set = True if title: @@ -123,7 +124,7 @@ class MyBlockingBusy(QDialog): self.db.set_authors(id, new_authors, notify=False) if do_title_case and not title_set: title = self.db.title(id, index_is_id=True) - self.db.set_title(id, title.title(), notify=False) + self.db.set_title(id, titlecase(title), notify=False) if au: self.db.set_authors(id, string_to_authors(au), notify=False) elif self.current_phase == 2: @@ -179,7 +180,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): s_r_functions = { '' : lambda x: x, _('Lower Case') : lambda x: x.lower(), _('Upper Case') : lambda x: x.upper(), - _('Title Case') : lambda x: x.title(), + _('Title Case') : lambda x: titlecase(x), } s_r_match_modes = [ _('Character match'), diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py index 76c086cc58..336ac2390b 100644 --- a/src/calibre/utils/formatter.py +++ b/src/calibre/utils/formatter.py @@ -7,6 +7,7 @@ Created on 23 Sep 2010 import re, string, traceback from calibre.constants import DEBUG +from calibre.utils.titlecase import titlecase class TemplateFormatter(string.Formatter): ''' @@ -81,7 +82,7 @@ class TemplateFormatter(string.Formatter): functions = { 'uppercase' : (0, lambda s,x: x.upper()), 'lowercase' : (0, lambda s,x: x.lower()), - 'titlecase' : (0, lambda s,x: x.title()), + 'titlecase' : (0, lambda s,x: titlecase(x)), 'capitalize' : (0, lambda s,x: x.capitalize()), 'contains' : (3, _contains), 'ifempty' : (1, _ifempty), From abd8ca4cb9a179e71f7512175e88b0decea027d9 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 22 Oct 2010 13:55:17 +0100 Subject: [PATCH 41/69] Add the possibility of sorting collections by an arbitrary metadata field --- resources/default_tweaks.py | 18 ++++++++++++++ src/calibre/devices/usbms/books.py | 40 ++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 86921886ad..dbb1154172 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -137,6 +137,24 @@ auto_connect_to_folder = '' sony_collection_renaming_rules={} +# Specify how sony collections are sorted. This tweak is only applicable if +# metadata management is set to automatic. You can indicate which metadata is to +# be used to sort on a collection-by-collection basis. The format of the tweak +# is a list of metadata fields from which collections are made, followed by the +# name of the metadata field containing the sort value. +# Example: The following indicates that collections built from pubdate and tags +# are to be sorted by the value in the custom column '#mydate', that collections +# built from 'series' are to be sorted by 'series_index', and that all other +# collections are to be sorted by title. If a collection metadata field is not +# named, then if it is a series- based collection it is sorted by series order, +# otherwise it is sorted by title order. +# [(['pubdate', 'tags'],'#mydate'), (['series'],'series_index'), (['*'], 'title')] +# Note that the bracketing and parentheses are required. The syntax is +# [ ( [list of fields], sort field ) , ( [ list of fields ] , sort field ) ] +# Default: empty (no rules), so no collection attributes are named. +sony_collection_sorting_rules = [] + + # Create search terms to apply a query across several built-in search terms. # Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...} # Example: create the term 'myseries' that when used as myseries:foo would diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py index 462d78b233..f54bfbddf9 100644 --- a/src/calibre/devices/usbms/books.py +++ b/src/calibre/devices/usbms/books.py @@ -99,6 +99,15 @@ class CollectionsBookList(BookList): def supports_collections(self): return True + def in_category_sort_rules(self, attr): + sorts = tweaks['sony_collection_sorting_rules'] + for attrs,sortattr in sorts: + if attr in attrs or '*' in attrs: + print 'in category sort:', attr, sortattr + return sortattr + print 'in category sort:', attr, 'None' + return None + def compute_category_name(self, attr, category, field_meta): renames = tweaks['sony_collection_renaming_rules'] attr_name = renames.get(attr, None) @@ -116,6 +125,7 @@ class CollectionsBookList(BookList): from calibre.devices.usbms.driver import debug_print debug_print('Starting get_collections:', prefs['manage_device_metadata']) debug_print('Renaming rules:', tweaks['sony_collection_renaming_rules']) + debug_print('Sorting rules:', tweaks['sony_collection_sorting_rules']) # Complexity: we can use renaming rules only when using automatic # management. Otherwise we don't always have the metadata to make the @@ -171,6 +181,7 @@ class CollectionsBookList(BookList): else: val = [val] + sort_attr = self.in_category_sort_rules(attr) for category in val: is_series = False if doing_dc: @@ -199,22 +210,41 @@ class CollectionsBookList(BookList): if cat_name not in collections: collections[cat_name] = {} - if is_series: + if use_renaming_rules and sort_attr: + sort_val = book.get(sort_attr, None) + collections[cat_name][lpath] = \ + (book, sort_val, book.get('title_sort', 'zzzz')) + elif is_series: if doing_dc: collections[cat_name][lpath] = \ - (book, book.get('series_index', sys.maxint)) + (book, book.get('series_index', sys.maxint), '') else: collections[cat_name][lpath] = \ - (book, book.get(attr+'_index', sys.maxint)) + (book, book.get(attr+'_index', sys.maxint), '') else: if lpath not in collections[cat_name]: collections[cat_name][lpath] = \ - (book, book.get('title_sort', 'zzzz')) + (book, book.get('title_sort', 'zzzz'), '') # Sort collections result = {} + + def none_cmp(xx, yy): + x = xx[1] + y = yy[1] + if x is None and y is None: + return cmp(xx[2], yy[2]) + if x is None: + return 1 + if y is None: + return -1 + c = cmp(x, y) + if c != 0: + return c + return cmp(xx[2], yy[2]) + for category, lpaths in collections.items(): books = lpaths.values() - books.sort(cmp=lambda x,y:cmp(x[1], y[1])) + books.sort(cmp=none_cmp) result[category] = [x[0] for x in books] return result From e568a55d5944c483751baa65b11c73c12e244aa6 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 22 Oct 2010 13:58:19 +0100 Subject: [PATCH 42/69] Improve sony_collection_renaming_rules documentation. --- resources/default_tweaks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index dbb1154172..270b7e0b06 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -106,7 +106,8 @@ title_sort_articles=r'^(A|The|An)\s+' auto_connect_to_folder = '' -# Specify renaming rules for sony collections. Collections on Sonys are named +# Specify renaming rules for sony collections. This tweak is only applicable if +# metadata management is set to automatic. Collections on Sonys are named # depending upon whether the field is standard or custom. A collection derived # from a standard field is named for the value in that field. For example, if # the standard 'series' column contains the name 'Darkover', then the series From 122fb530873c8559fe5434b3479f935473f63458 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 22 Oct 2010 14:01:11 +0100 Subject: [PATCH 43/69] Remove print statements --- src/calibre/devices/usbms/books.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py index f54bfbddf9..5063daa29f 100644 --- a/src/calibre/devices/usbms/books.py +++ b/src/calibre/devices/usbms/books.py @@ -103,9 +103,7 @@ class CollectionsBookList(BookList): sorts = tweaks['sony_collection_sorting_rules'] for attrs,sortattr in sorts: if attr in attrs or '*' in attrs: - print 'in category sort:', attr, sortattr return sortattr - print 'in category sort:', attr, 'None' return None def compute_category_name(self, attr, category, field_meta): From fddf3abf92b84eabc0a11b9bceda826e0a24be37 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 22 Oct 2010 18:20:26 +0100 Subject: [PATCH 44/69] Fix broken autonumbering for standard series columns --- src/calibre/gui2/dialogs/metadata_bulk.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index de62f20de0..32350c36b7 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -51,6 +51,7 @@ class MyBlockingBusy(QDialog): self.start() self.args = args + self.series_start_value = None self.db = db self.ids = ids self.error = None @@ -148,8 +149,10 @@ class MyBlockingBusy(QDialog): if do_series: if do_series_restart: - next = series_start_value - series_start_value += 1 + if self.series_start_value is None: + self.series_start_value = series_start_value + next = self.series_start_value + self.series_start_value += 1 else: next = self.db.get_next_series_num_for(series) self.db.set_series(id, series, notify=False, commit=False) From 69481d62d6ad31546d83f89fa63172765bc11d9c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 22 Oct 2010 14:40:43 -0700 Subject: [PATCH 45/69] Increase image size for comics in Kindle DX profile for better conversion of comics to PDF --- src/calibre/customize/profiles.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 4fa53b1cdb..27f0805f86 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -583,7 +583,8 @@ class KindleDXOutput(OutputProfile): # Screen size is a best guess screen_size = (744, 1022) dpi = 150.0 - comic_screen_size = (741, 1022) + comic_screen_size = (771, 1116) + #comic_screen_size = (741, 1022) supports_mobi_indexing = True periodical_date_in_title = False mobi_ems_per_blockquote = 2.0 From 3839b99e9c5639e5c6a34b9e54531a4e664cf4d1 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 23 Oct 2010 15:11:06 +0100 Subject: [PATCH 46/69] Avoid creating metadata.opf files in the root of the library when adding books --- src/calibre/library/database2.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index b21299c335..4943dd9dd5 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -654,16 +654,20 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def get_metadata_for_dump(self, idx, remove_from_dirtied=True): try: - path = os.path.join(self.abspath(idx, index_is_id=True), 'metadata.opf') - mi = self.get_metadata(idx, index_is_id=True) - # Always set cover to cover.jpg. Even if cover doesn't exist, - # no harm done. This way no need to call dirtied when - # cover is set/removed - mi.cover = 'cover.jpg' + path, mi = (None, None) + # While a book is being created, the path is empty. Don't bother to + # try to write the opf, because it will go to the wrong folder. + if self.path(idx, index_is_id=True): + path = os.path.join(self.abspath(idx, index_is_id=True), 'metadata.opf') + mi = self.get_metadata(idx, index_is_id=True) + # Always set cover to cover.jpg. Even if cover doesn't exist, + # no harm done. This way no need to call dirtied when + # cover is set/removed + mi.cover = 'cover.jpg' except: # This almost certainly means that the book has been deleted while # the backup operation sat in the queue. - path, mi = (None, None) + pass try: # clear the dirtied indicator. The user must put it back if From c020a3d12797a57634a700addaaa0caa87bee150 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Oct 2010 19:21:01 -0700 Subject: [PATCH 47/69] STNN by Larry Chan --- resources/recipes/stnn.recipe | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 resources/recipes/stnn.recipe diff --git a/resources/recipes/stnn.recipe b/resources/recipes/stnn.recipe new file mode 100644 index 0000000000..7be357c921 --- /dev/null +++ b/resources/recipes/stnn.recipe @@ -0,0 +1,60 @@ + + +__license__ = 'GPL v3' +__copyright__ = '2010, Larry Chan ' +''' +Singtao STNN +''' +from calibre.web.feeds.recipes import BasicNewsRecipe + +class SingtaoSTNN(BasicNewsRecipe): + title = 'Singtao STNN' + __author__ = 'Larry Chan, larry1chan' + description = 'Chinese News' + oldest_article = 2 + max_articles_per_feed = 100 + simultaneous_downloads = 5 + no_stylesheets = True + #delay = 1 + use_embedded_content = False + encoding = 'gb2312' + publisher = 'Singtao STNN' + category = 'news, China, world' + language = 'zh' + publication_type = 'newsportal' + extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + masthead_url = 'http://www.stnn.cc/images/0806/logo_080728.gif' + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + + + remove_tags_before = dict(name='div', attrs={'class':['page_box']}) + remove_tags_after = dict(name='div', attrs={'class':['pagelist']}) + + keep_only_tags = [ + dict(name='div', attrs={'class':['font_title clearfix']}), + dict(name='div', attrs={'id':['content_zoom']}) + + ] + + remove_attributes = ['width','height','href'] + + # for a full list of rss check out [url]http://www.stnn.cc/rss/[/url] + + feeds = [ (u'Headline News', u'http://www.stnn.cc/rss/news/index.xml'), + (u'Breaking News', u'http://www.stnn.cc/rss/tufa/index.xml'), + (u'Finance', u'http://www.stnn.cc/rss/fin/index.xml'), + (u'Entertainment', u'http://www.stnn.cc/rss/ent/index.xml'), + (u'International', u'http://www.stnn.cc/rss/guoji/index.xml'), + (u'China', u'http://www.stnn.cc/rss/china/index.xml'), + (u'Opnion', u'http://www.stnn.cc/rss/fin_op/index.xml'), + (u'Blog', u'http://blog.stnn.cc/uploadfile/rssblogtypehotlog.xml'), + (u'Hong Kong', u'http://www.stnn.cc/rss/hongkong/index.xml') + + ] + From 4110646e8a2f81d84b5c72a9347fb5661811137f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 24 Oct 2010 09:16:16 -0700 Subject: [PATCH 48/69] Fix #7287 (Building calibre fails on PyQt 4.8.0) --- src/calibre/gui2/lrf_renderer/main.ui | 189 +++++++++++--------------- src/calibre/gui2/viewer/main.ui | 4 +- 2 files changed, 81 insertions(+), 112 deletions(-) diff --git a/src/calibre/gui2/lrf_renderer/main.ui b/src/calibre/gui2/lrf_renderer/main.ui index 0e7b54edb8..4143b4f509 100644 --- a/src/calibre/gui2/lrf_renderer/main.ui +++ b/src/calibre/gui2/lrf_renderer/main.ui @@ -1,7 +1,8 @@ - + + MainWindow - - + + 0 0 @@ -9,75 +10,51 @@ 701 - - + + 0 0 - + LRF Viewer - - + + :/images/viewer.png:/images/viewer.png - - - - 0 - 39 - 601 - 662 - - - - + + + 0 - - + + 0 - - - - 0 - 0 - 601 - 662 - - - - + + + 0 - - - + + + true - - - - 0 - 0 - 601 - 701 - - - + + - + Qt::Vertical - + 20 40 @@ -86,34 +63,34 @@ - - + + QFrame::StyledPanel - + QFrame::Raised - + - - + + 0 - + -1 - - + + 11 75 true - + Parsing LRF file @@ -123,10 +100,10 @@ - + Qt::Vertical - + 20 40 @@ -140,93 +117,85 @@ - - - - 0 - 0 - 601 - 39 - - - + + LRF Viewer toolbar - + Qt::AllToolBarAreas - - TopToolBarArea + + Qt::TopToolBarArea - - true + + false - - - - - - - - - + + + + + + + + + - - - + + + :/images/next.png:/images/next.png - + Next Page - - - + + + :/images/previous.png:/images/previous.png - + Previous Page - - - + + + :/images/back.png:/images/back.png - + Back - - - + + + :/images/forward.png:/images/forward.png - + Forward - - + + Next match - - - + + + :/images/document_open.png:/images/document_open.png - + Open ebook - - - + + + :/images/config.png:/images/config.png - + Configure @@ -239,7 +208,7 @@ - + diff --git a/src/calibre/gui2/viewer/main.ui b/src/calibre/gui2/viewer/main.ui index e3b8fb8a61..4cfa1590da 100644 --- a/src/calibre/gui2/viewer/main.ui +++ b/src/calibre/gui2/viewer/main.ui @@ -108,7 +108,7 @@ - LeftToolBarArea + Qt::LeftToolBarArea false @@ -136,7 +136,7 @@ - TopToolBarArea + Qt::TopToolBarArea false From 138d503aba2de23bdbfbe1a0c488c84dbe2b2754 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 24 Oct 2010 14:57:43 -0700 Subject: [PATCH 49/69] Fix #7276 (Viewer crash on main app close. (v0.7.24)) --- src/calibre/gui2/viewer/main.py | 3 +++ src/calibre/utils/ipc/worker.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index e113ef0611..09019af18b 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -716,6 +716,9 @@ View an ebook. def main(args=sys.argv): + # Ensure viewer can continue to function if GUI is closed + os.environ.pop('CALIBRE_WORKER_TEMP_DIR', None) + parser = option_parser() opts, args = parser.parse_args(args) pid = os.fork() if False and (islinux or isfreebsd) else -1 diff --git a/src/calibre/utils/ipc/worker.py b/src/calibre/utils/ipc/worker.py index e3584380a1..d8ffad7c53 100644 --- a/src/calibre/utils/ipc/worker.py +++ b/src/calibre/utils/ipc/worker.py @@ -105,7 +105,7 @@ def main(): notifier.start() result = func(*args, **kwargs) - if result is not None: + if result is not None and os.path.exists(os.path.dirname(resultf)): cPickle.dump(result, open(resultf, 'wb'), -1) notifier.queue.put(None) From 7c25c4d149f70c3abc281a52c328a0efada46e36 Mon Sep 17 00:00:00 2001 From: Timothy Legge Date: Mon, 25 Oct 2010 21:18:06 -0300 Subject: [PATCH 50/69] Changes for kobo WIFI version 1.7 --- src/calibre/devices/kobo/driver.py | 63 ++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 418bfe5e0d..3562da55d2 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -22,7 +22,9 @@ class KOBO(USBMS): gui_name = 'Kobo Reader' description = _('Communicate with the Kobo Reader') author = 'Timothy Legge and Kovid Goyal' - version = (1, 0, 6) + version = (1, 0, 7) + + dbversion = 0 supported_platforms = ['windows', 'osx', 'linux'] @@ -92,7 +94,7 @@ class KOBO(USBMS): if lpath.startswith(os.sep): lpath = lpath[len(os.sep):] lpath = lpath.replace('\\', '/') -# print "LPATH: " + lpath + # debug_print("LPATH: ", lpath, " - Title: " , title) playlist_map = {} @@ -112,7 +114,7 @@ class KOBO(USBMS): #print "Image name Normalized: " + imagename if imagename is not None: bl[idx].thumbnail = ImageWrapper(imagename) - if ContentType != '6': + if (ContentType != '6'and self.dbversion < 8) or (self.dbversion >= 8): if self.update_metadata_item(bl[idx]): # print 'update_metadata_item returned true' changed = True @@ -120,10 +122,16 @@ class KOBO(USBMS): playlist_map[lpath] not in bl[idx].device_collections: bl[idx].device_collections.append(playlist_map[lpath]) else: - if ContentType == '6': + if ContentType == '6' and self.dbversion < 8: book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576) else: - book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID) + try: + book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID) + except: + debug_print("prefix: ", prefix, "lpath: ", lpath, "title: ", title, "authors: ", authors, \ + "mime: ", mime, "date: ", date, "ContentType: ", ContentType, "ImageID: ", ImageID) + raise + # print 'Update booklist' book.device_collections = [playlist_map[lpath]] if lpath in playlist_map else [] @@ -143,6 +151,13 @@ class KOBO(USBMS): # numrows = row[0] #cursor.close() + # Determine the database version + # 4 - Bluetooth Kobo Rev 2 (1.4) + # 8 - WIFI KOBO Rev 1 + cursor.execute('select version from dbversion') + result = cursor.fetchone() + self.dbversion = result[0] + query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ 'ImageID, ReadStatus from content where BookID is Null' @@ -153,7 +168,8 @@ class KOBO(USBMS): # self.report_progress((i+1) / float(numrows), _('Getting list of books on device...')) path = self.path_from_contentid(row[3], row[5], oncard) - mime = mime_type_ext(path_to_ext(row[3])) + mime = mime_type_ext(path_to_ext(path)) if path.find('kepub') == -1 else 'application/epub+zip' + # debug_print("mime:", mime) if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"): changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7]) @@ -206,7 +222,7 @@ class KOBO(USBMS): cursor.close() cursor = connection.cursor() - if ContentType == 6: + if ContentType == 6 and self.dbversion < 8: # Delete the shortcover_pages first cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t) @@ -249,7 +265,7 @@ class KOBO(USBMS): path = self.normalize_path(path) # print "Delete file normalized path: " + path extension = os.path.splitext(path)[1] - ContentType = self.get_content_type_from_extension(extension) + ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(path) ContentID = self.contentid_from_path(path, ContentType) @@ -332,9 +348,14 @@ class KOBO(USBMS): def contentid_from_path(self, path, ContentType): if ContentType == 6: - ContentID = os.path.splitext(path)[0] - # Remove the prefix on the file. it could be either - ContentID = ContentID.replace(self._main_prefix, '') + if self.dbversion < 8: + ContentID = os.path.splitext(path)[0] + # Remove the prefix on the file. it could be either + ContentID = ContentID.replace(self._main_prefix, '') + else: + ContentID = path + ContentID = ContentID.replace(self._main_prefix + '.kobo/kepub/', '') + if self._card_a_prefix is not None: ContentID = ContentID.replace(self._card_a_prefix, '') elif ContentType == 999: # HTML Files @@ -350,6 +371,13 @@ class KOBO(USBMS): ContentID = ContentID.replace("\\", '/') return ContentID + def get_content_type_from_path(self, path): + # Strictly speaking the ContentType could be 6 or 10 + # however newspapers have the same storage format + if path.find('kepub') >= 0: + ContentType = 6 + return ContentType + def get_content_type_from_extension(self, extension): if extension == '.kobo': # Kobo books do not have book files. They do have some images though @@ -369,19 +397,22 @@ class KOBO(USBMS): print 'path from_contentid cardb' elif oncard == 'carda': path = path.replace("file:///mnt/sd/", self._card_a_prefix) - # print "SD Card: " + filename + # print "SD Card: " + path else: - if ContentType == "6": + if ContentType == "6" and self.dbversion < 8: # This is a hack as the kobo files do not exist # but the path is required to make a unique id # for calibre's reference path = self._main_prefix + path + '.kobo' # print "Path: " + path + elif (ContentType == "6" or ContentType == "10") and self.dbversion >= 8: + path = self._main_prefix + '.kobo/kepub/' + path + # print "Internal: " + path else: # if path.startswith("file:///mnt/onboard/"): path = path.replace("file:///mnt/onboard/", self._main_prefix) path = path.replace("/mnt/onboard/", self._main_prefix) - # print "Internal: " + filename + # print "Internal: " + path return path @@ -469,7 +500,7 @@ class KOBO(USBMS): book.device_collections = ['Im_Reading'] extension = os.path.splitext(book.path)[1] - ContentType = self.get_content_type_from_extension(extension) + ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) ContentID = self.contentid_from_path(book.path, ContentType) datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()) @@ -505,7 +536,7 @@ class KOBO(USBMS): book.device_collections = ['Read'] extension = os.path.splitext(book.path)[1] - ContentType = self.get_content_type_from_extension(extension) + ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) ContentID = self.contentid_from_path(book.path, ContentType) # datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()) From d7f649ce985334da8a52be56351f1807b2e643f7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Oct 2010 18:14:52 -0700 Subject: [PATCH 51/69] Ming Pao by Eddie Lau --- resources/recipes/ming_pao.recipe | 64 +++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 resources/recipes/ming_pao.recipe diff --git a/resources/recipes/ming_pao.recipe b/resources/recipes/ming_pao.recipe new file mode 100644 index 0000000000..6a61405698 --- /dev/null +++ b/resources/recipes/ming_pao.recipe @@ -0,0 +1,64 @@ +cense__ = 'GPL v3' +__copyright__ = '2010, Eddie Lau' +''' +modified from Singtao Toronto calibre recipe by rty +''' + +import datetime +from calibre.web.feeds.recipes import BasicNewsRecipe + +class AdvancedUserRecipe1278063072(BasicNewsRecipe): + title = 'Ming Pao - Hong Kong' + oldest_article = 1 + max_articles_per_feed = 100 + __author__ = 'Eddie Lau' + description = 'Hong Kong Chinese Newspaper' + publisher = 'news.mingpao.com' + category = 'Chinese, News, Hong Kong' + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + language = 'zh' + encoding = 'Big5-HKSCS' + recursions = 0 + conversion_options = {'linearize_tables':True} + masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' + + keep_only_tags = [dict(name='h1'), + dict(attrs={'id':['newscontent01','newscontent02']})] + + def get_fetchdate(self): + dt_utc = datetime.datetime.utcnow() + # convert UTC to local hk time + dt_local = dt_utc - datetime.timedelta(-8.0/24) + return dt_local.strftime("%Y%m%d") + + def parse_index(self): + feeds = [] + dateStr = self.get_fetchdate() + for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + return feeds + + def parse_section(self, url): + dateStr = self.get_fetchdate() + soup = self.index_to_soup(url) + divs = soup.findAll(attrs={'class': ['bullet']}) + current_articles = [] + for i in divs: + a = i.find('a', href = True) + title = self.tag_to_string(a) + url = a.get('href', False) + url = 'http://news.mingpao.com/' + dateStr + '/' +url + current_articles.append({'title': title, 'url': url, 'description':''}) + return current_articles + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll(width=True): + del item['width'] + return soup + From e86d52d4f0c60a299a3de9d71e92a5bbe36c69b9 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Tue, 26 Oct 2010 09:21:13 +0100 Subject: [PATCH 52/69] Fix #7300 - date comparisons wrong --- src/calibre/library/caches.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 300ddbac0b..03383ee7dd 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -380,7 +380,7 @@ class ResultCache(SearchQueryParser): # {{{ field_count = 3 else: try: - qd = parse_date(query) + qd = parse_date(query, as_utc=False) except: raise ParseException(query, len(query), 'Date conversion error', self) if '-' in query: From 284ca8f7a15a6d653761694e125a721a2d68a1bc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Oct 2010 08:08:57 -0600 Subject: [PATCH 53/69] ... --- src/calibre/web/feeds/news.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index cb6bf30bcf..869799f6bb 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -842,6 +842,9 @@ class BasicNewsRecipe(Recipe): except NotImplementedError: feeds = self.parse_feeds() + if not feeds: + raise ValueError('No articles found, aborting') + #feeds = FeedCollection(feeds) self.report_progress(0, _('Trying to download cover...')) From 9a93d3fd2dccbf0f1fb9212dc00134ea41a8310a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Oct 2010 08:17:02 -0600 Subject: [PATCH 54/69] /browse: Fix handling of non-ascii saved searches --- src/calibre/library/server/browse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index 463fcd6fde..142f40efab 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -509,7 +509,7 @@ class BrowseServer(object): hide_sort = 'true' if dt == 'series' else 'false' if category == 'search': - which = unhexlify(cid) + which = unhexlify(cid).decode('utf-8') try: ids = self.search_cache('search:"%s"'%which) except: From 330a7c989b163e27ca36424c1b680a95c8a47e40 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Oct 2010 08:18:42 -0600 Subject: [PATCH 55/69] Content server: Make /browse the default --- src/calibre/library/server/content.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index d95cd1818c..52a08e6175 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -124,7 +124,7 @@ class ContentServer(object): if want_mobile: return self.mobile() - return self.static('index.html') + return self.browse_toplevel() def old(self, **kwargs): return self.static('index.html') From 7988560d75a681089eabc2a0929bcdc1e5a3ae2f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Oct 2010 09:55:35 -0600 Subject: [PATCH 56/69] SONY driver: Fix bug when adding records to empty cacheExt.xml --- src/calibre/devices/prs505/sony_cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/prs505/sony_cache.py b/src/calibre/devices/prs505/sony_cache.py index 15245d3cd5..17eea3a27c 100644 --- a/src/calibre/devices/prs505/sony_cache.py +++ b/src/calibre/devices/prs505/sony_cache.py @@ -573,7 +573,10 @@ class XMLCache(object): ans = root.makeelement('{%s}text'%namespace, attrib=attrib, nsmap=root.nsmap) ans.tail = '\n' - root[-1].tail = '\n' + '\t' + if len(root) > 0: + root[-1].tail = '\n\t' + else: + root.text = '\n\t' root.append(ans) if thumbnail and thumbnail[-1]: ans.text = '\n' + '\t\t' From 3fdde535027acca84c4423ac21a2584c71b7a3c2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Oct 2010 09:59:57 -0600 Subject: [PATCH 57/69] Fix #7304 (New York Times Conversion Error) --- resources/recipes/nytimes_sub.recipe | 551 +++++++-------------------- 1 file changed, 133 insertions(+), 418 deletions(-) diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index 1814132667..5452ae1c6e 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -4,149 +4,79 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' nytimes.com -V5 - One picture per article, moved to top: -Headline -Image -Byline -Story ''' -import re, string, time +import string, re, time from calibre import strftime from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +def decode(self, src): + enc = 'utf-8' + if 'iso-8859-1' in src: + enc = 'cp1252' + return src.decode(enc, 'ignore') class NYTimes(BasicNewsRecipe): - title = 'The New York Times' - __author__ = 'GRiker' + title = u'New York Times' + __author__ = 'Kovid Goyal/Nick Redding' language = 'en' - requires_version = (0, 7, 5) + requires_version = (0, 6, 36) description = 'Daily news from the New York Times (subscription version)' - allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials', - 'New York','Business Day','Science Times','Sports','Dining','Arts', - 'Home','Styles','Sunday Business','Week In Review','Travel','Magazine', - 'Book Review','Weddings','Real Estate','Automobiles',"T Men's Fashion", - "T Women's Fashion"] - - # List of sections to exclude - # To add a section, copy the section name from the allSectionKeywords list above - # For example, to exclude 'Dining' and 'Weddings': - #excludeSectionKeywords = ['Dining','Weddings'] - excludeSectionKeywords = [] - - # List of sections to include (test and debug only) - # By default, any sections in today's paper that are not listed in excludeSectionKeywords - # are downloaded. fetch_only specifies that only certain sections are to be downloaded. - # This should only be used for testing and debugging. - # For example, to download only 'The Front Page' section: - # fetch_only = set(['The Front Page']) - fetch_only = set([]) - if fetch_only: - excludeSectionKeywords = list(set(allSectionKeywords) ^ fetch_only) - - # one_picture_per_article specifies that calibre should only use the first image - # from an article (if one exists). If one_picture_per_article = True, the image - # will be moved to a location between the headline and the byline. - # If one_picture_per_article = False, all images from the article will be included - # and shown in their original location. - one_picture_per_article = True - - timefmt = '' + timefmt = ' [%b %d]' needs_subscription = True remove_tags_before = dict(id='article') remove_tags_after = dict(id='article') - remove_tags = [dict(attrs={'class':[ - 'articleFooter', - 'articleTools', - 'columnGroup doubleRule', - 'columnGroup singleRule', - 'columnGroup last', - 'columnGroup last', - 'doubleRule', - 'dottedLine', - 'entry-meta', - 'entry-response module', - 'icon enlargeThis', - 'leftNavTabs', - 'module box nav', - 'nextArticleLink', - 'nextArticleLink clearfix', - 'post-tools', - 'relatedSearchesModule', - 'side_tool', - 'singleAd', - 'subNavigation clearfix', - 'subNavigation tabContent active', - 'subNavigation tabContent active clearfix', - ]}), - dict(id=[ - 'adxLeaderboard', - 'archive', - 'articleExtras', - 'articleInline', - 'blog_sidebar', - 'businessSearchBar', - 'cCol', - 'entertainmentSearchBar', - 'footer', - 'header', - 'header_search', - 'login', - 'masthead', - 'masthead-nav', - 'memberTools', - 'navigation', - 'portfolioInline', - 'relatedArticles', - 'respond', - 'side_search', - 'side_index', - 'side_tool', - 'toolsRight', - ]), - dict(name=['script', 'noscript', 'style'])] - masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' - cover_margins = (18,18,'grey99') + remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool','nextArticleLink', + 'nextArticleLink clearfix','columnGroup doubleRule','doubleRule','entry-meta', + 'icon enlargeThis','columnGroup last','relatedSearchesModule']}), + dict({'class':re.compile('^subNavigation')}), + dict({'class':re.compile('^leaderboard')}), + dict({'class':re.compile('^module')}), + dict({'class':'metaFootnote'}), + dict(id=['inlineBox','footer', 'toolsRight', 'articleInline','login','masthead', + 'navigation', 'archive', 'side_search', 'blog_sidebar','cCol','portfolioInline', + 'side_tool', 'side_index','header','readerReviewsCount','readerReviews', + 'relatedArticles', 'relatedTopics', 'adxSponLink']), + dict(name=['script', 'noscript', 'style','form','hr'])] + encoding = decode no_stylesheets = True - extra_css = '.headline {text-align: left;}\n \ - .byline {font-family: monospace; \ - text-align: left; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .dateline {font-size: small; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .timestamp {font-size: small; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .source {text-align: left;}\n \ - .image {text-align: center;}\n \ - .credit {text-align: right; \ - font-size: small; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .articleBody {text-align: left;}\n \ - .authorId {text-align: left; \ - font-style: italic;}\n ' + extra_css = ''' + .articleHeadline { margin-top:0.5em; margin-bottom:0.25em; } + .credit { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + .byline { font-size: small; font-style:italic; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; } + .dateline { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + .timestamp { font-size: small; } + .caption { font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + a:link {text-decoration: none; }''' def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: - try: - br.open('http://www.nytimes.com/auth/login') - br.select_form(name='login') - br['USERID'] = self.username - br['PASSWORD'] = self.password - raw = br.submit().read() - if 'Sorry, we could not find the combination you entered. Please try again.' in raw: - raise Exception('Your username and password are incorrect') - #open('/t/log.html', 'wb').write(raw) - except: - self.log("\nFailed to login") - + br.open('http://www.nytimes.com/auth/login') + br.select_form(name='login') + br['USERID'] = self.username + br['PASSWORD'] = self.password + raw = br.submit().read() + if 'Sorry, we could not find the combination you entered. Please try again.' in raw: + raise Exception('Your username and password are incorrect') + #open('/t/log.html', 'wb').write(raw) return br + def get_masthead_url(self): + masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' + #masthead = 'http://members.cox.net/nickredding/nytlogo.gif' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nMasthead unavailable") + masthead = None + return masthead + + def get_cover_url(self): cover = None st = time.localtime() @@ -162,316 +92,101 @@ class NYTimes(BasicNewsRecipe): cover = None return cover - def get_masthead_title(self): - return self.title - - def dump_ans(self, ans): - total_article_count = 0 - for section in ans : - if self.verbose: - self.log("section %s: %d articles" % (section[0], len(section[1])) ) - for article in section[1]: - total_article_count += 1 - if self.verbose: - self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('mac-roman','replace'), - article['url'].encode('mac-roman','replace'))) - self.log( "Queued %d articles" % total_article_count ) - - def dump_hex(self, src, length=16): - ''' Diagnostic ''' - FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) - N=0; result='' - while src: - s,src = src[:length],src[length:] - hexa = ' '.join(["%02X"%ord(x) for x in s]) - s = s.translate(FILTER) - result += "%04X %-*s %s\n" % (N, length*3, hexa, s) - N+=length - print result - - def fixChars(self,string): - # Replace lsquo (\x91) - fixed = re.sub("\x91","‘",string) - - # Replace rsquo (\x92) - fixed = re.sub("\x92","’",fixed) - - # Replace ldquo (\x93) - fixed = re.sub("\x93","“",fixed) - - # Replace rdquo (\x94) - fixed = re.sub("\x94","”",fixed) - - # Replace ndash (\x96) - fixed = re.sub("\x96","–",fixed) - - # Replace mdash (\x97) - fixed = re.sub("\x97","—",fixed) - - return fixed - - def massageNCXText(self, description): - # Kindle TOC descriptions won't render certain characters - if description: - massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)) - # Replace '&' with '&' - massaged = re.sub("&","&", massaged) - return self.fixChars(massaged) - else: - return description + def short_title(self): + return 'New York Times' def parse_index(self): + self.encoding = 'cp1252' soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html') + self.encoding = decode def feed_title(div): - return ''.join(div.findAll(text=True, recursive=False)).strip() + return ''.join(div.findAll(text=True, recursive=True)).strip() articles = {} key = None ans = [] - # Find each instance of class="section-headline", class="story", class="story headline" - for div in soup.findAll(True, - attrs={'class':['section-headline', 'story', 'story headline']}): + url_list = [] - if div['class'] == 'section-headline': - key = string.capwords(feed_title(div)) - if self.excludeSectionKeywords: - excluded = re.compile('|'.join(self.excludeSectionKeywords)) - if excluded.search(key): - self.log("Skipping section %s" % key) - continue - articles[key] = [] - ans.append(key) - - elif div['class'] in ['story', 'story headline'] : - a = div.find('a', href=True) - if not a: - continue - url = re.sub(r'\?.*', '', a['href']) - url += '?pagewanted=all' - - title = self.massageNCXText(self.tag_to_string(a, use_alt=True).strip()) - - description = '' - pubdate = strftime('%a, %d %b') - summary = div.find(True, attrs={'class':'summary'}) - if summary: - description = self.massageNCXText(self.tag_to_string(summary, use_alt=False)) - - author = '' - authorAttribution = div.find(True, attrs={'class':'storyheadline-author'}) + def handle_article(div): + a = div.find('a', href=True) + if not a: + return + url = re.sub(r'\?.*', '', a['href']) + if not url.startswith("http"): + return + if not url.endswith(".html"): + return + if 'podcast' in url: + return + url += '?pagewanted=all' + if url in url_list: + return + url_list.append(url) + title = self.tag_to_string(a, use_alt=True).strip() + #self.log("Title: %s" % title) + description = '' + pubdate = strftime('%a, %d %b') + summary = div.find(True, attrs={'class':'summary'}) + if summary: + description = self.tag_to_string(summary, use_alt=False) + author = '' + authorAttribution = div.find(True, attrs={'class':'byline'}) + if authorAttribution: + author = self.tag_to_string(authorAttribution, use_alt=False) + else: + authorAttribution = div.find(True, attrs={'class':'byline'}) if authorAttribution: author = self.tag_to_string(authorAttribution, use_alt=False) - else: - authorAttribution = div.find(True, attrs={'class':'byline'}) - if authorAttribution: - author = self.tag_to_string(authorAttribution, use_alt=False) - # Kill commas - Kindle switches to '&' - author = re.sub(',','',author) + feed = key if key is not None else 'Uncategorized' + if not articles.has_key(feed): + articles[feed] = [] + articles[feed].append( + dict(title=title, url=url, date=pubdate, + description=description, author=author, + content='')) - feed = key if key is not None else 'Uncategorized' - if not articles.has_key(feed): - articles[feed] = [] - if not 'podcasts' in url: - articles[feed].append( - dict(title=title, url=url, date=pubdate, - description=description, author=author, - content='')) - ans = self.sort_index_by(ans, {'The Front Page':-1, - 'Dining In, Dining Out':1, - 'Obituaries':2}) + + + # Find each instance of class="section-headline", class="story", class="story headline" + for div in soup.findAll(True, + attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}): + + if div['class'] in ['section-headline','sectionHeader']: + key = string.capwords(feed_title(div)) + articles[key] = [] + ans.append(key) + #self.log('Section: %s' % key) + + elif div['class'] in ['story', 'story headline'] : + handle_article(div) + elif div['class'] == 'headlinesOnly multiline flush': + for lidiv in div.findAll('li'): + handle_article(lidiv) + +# ans = self.sort_index_by(ans, {'The Front Page':-1, +# 'Dining In, Dining Out':1, +# 'Obituaries':2}) ans = [(key, articles[key]) for key in ans if articles.has_key(key)] - self.dump_ans(ans) + return ans - def skip_ad_pages(self, soup): - # Skip ad pages served before actual article - skip_tag = soup.find(True, {'name':'skip'}) - if skip_tag is not None: - self.log.warn("Found forwarding link: %s" % skip_tag.parent['href']) - url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) - url += '?pagewanted=all' - self.log.warn("Skipping ad to article at '%s'" % url) - return self.index_to_soup(url, raw=True) - def preprocess_html(self, soup): - return self.strip_anchors(soup) + kicker_tag = soup.find(attrs={'class':'kicker'}) + if kicker_tag: + tagline = self.tag_to_string(kicker_tag) + #self.log("FOUND KICKER %s" % tagline) + if tagline=='Op-Ed Columnist': + img_div = soup.find('div','inlineImage module') + #self.log("Searching for photo") + if img_div: + img_div.extract() + #self.log("Photo deleted") + refresh = soup.find('meta', {'http-equiv':'refresh'}) + if refresh is None: + return soup + content = refresh.get('content').partition('=')[2] + raw = self.browser.open_novisit('http://www.nytimes.com'+content).read() + return BeautifulSoup(raw.decode('cp1252', 'replace')) - def postprocess_html(self,soup, True): - print "\npostprocess_html()\n" - - if self.one_picture_per_article: - # Remove all images after first - largeImg = soup.find(True, {'class':'articleSpanImage'}) - inlineImgs = soup.findAll(True, {'class':'inlineImage module'}) - if largeImg: - for inlineImg in inlineImgs: - inlineImg.extract() - else: - if inlineImgs: - firstImg = inlineImgs[0] - for inlineImg in inlineImgs[1:]: - inlineImg.extract() - # Move firstImg after headline - cgFirst = soup.find(True, {'class':'columnGroup first'}) - if cgFirst: - # Strip all sibling NavigableStrings: noise - navstrings = cgFirst.findAll(text=True, recursive=False) - [ns.extract() for ns in navstrings] - headline_found = False - tag = cgFirst.find(True) - insertLoc = 0 - while True: - insertLoc += 1 - if hasattr(tag,'class') and tag['class'] == 'articleHeadline': - headline_found = True - break - tag = tag.nextSibling - if not tag: - headline_found = False - break - if headline_found: - cgFirst.insert(insertLoc,firstImg) - else: - self.log(">>> No class:'columnGroup first' found <<<") - # Change class="kicker" to

    - kicker = soup.find(True, {'class':'kicker'}) - if kicker and kicker.contents and kicker.contents[0]: - h3Tag = Tag(soup, "h3") - h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker, - use_alt=False))) - kicker.replaceWith(h3Tag) - - # Change captions to italic -1 - for caption in soup.findAll(True, {'class':'caption'}) : - if caption and caption.contents[0]: - emTag = Tag(soup, "em") - c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() - mp_off = c.find("More Photos") - if mp_off >= 0: - c = c[:mp_off] - emTag.insert(0, c) - #hrTag = Tag(soup, 'hr') - #hrTag['class'] = 'caption_divider' - hrTag = Tag(soup, 'div') - hrTag['class'] = 'divider' - emTag.insert(1, hrTag) - caption.replaceWith(emTag) - - # Change to

    - h1 = soup.find('h1') - if h1: - headline = h1.find("nyt_headline") - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.contents[0])) - h1.replaceWith(tag) - else: - # Blog entry - replace headline, remove
    tags - headline = soup.find('title') - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.contents[0])) - soup.insert(0, tag) - hrs = soup.findAll('hr') - for hr in hrs: - hr.extract() - - # Change

    to

    - used in editorial blogs - masthead = soup.find("h1") - if masthead: - # Nuke the href - if masthead.a: - del(masthead.a['href']) - tag = Tag(soup, "h3") - tag.insert(0, self.fixChars(masthead.contents[0])) - masthead.replaceWith(tag) - - # Change to - for subhead in soup.findAll(True, {'class':'bold'}) : - if subhead.contents: - bTag = Tag(soup, "b") - bTag.insert(0, subhead.contents[0]) - subhead.replaceWith(bTag) - - # Synthesize a section header - dsk = soup.find('meta', attrs={'name':'dsk'}) - if dsk and dsk.has_key('content'): - hTag = Tag(soup,'h3') - hTag['class'] = 'section' - hTag.insert(0,NavigableString(dsk['content'])) - articleTag = soup.find(True, attrs={'id':'article'}) - if articleTag: - articleTag.insert(0,hTag) - - # Add class="articleBody" to
    so we can format with CSS - divTag = soup.find('div',attrs={'id':'articleBody'}) - if divTag: - divTag['class'] = divTag['id'] - - # Add class="authorId" to
    so we can format with CSS - divTag = soup.find('div',attrs={'id':'authorId'}) - if divTag and divTag.contents[0]: - tag = Tag(soup, "p") - tag['class'] = "authorId" - tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], - use_alt=False))) - divTag.replaceWith(tag) - - return soup - - def populate_article_metadata(self,article,soup,first): - ''' - Extract author and description from article, add to article metadata - ''' - def extract_author(soup): - byline = soup.find('meta',attrs={'name':['byl','CLMST']}) - if byline : - author = byline['content'] - else : - # Try for