'
__docformat__ = 'restructuredtext en'
-import sys, cPickle, shutil
+import sys, cPickle, shutil, importlib
from PyQt4.Qt import QString, SIGNAL, QAbstractListModel, Qt, QVariant, QFont
@@ -182,8 +182,8 @@ class Config(ResizableDialog, Ui_Dialog):
output_widget = None
name = self.plumber.output_plugin.name.lower().replace(' ', '_')
try:
- output_widget = __import__('calibre.gui2.convert.'+name,
- fromlist=[1])
+ output_widget = importlib.import_module(
+ 'calibre.gui2.convert.'+name)
pw = output_widget.PluginWidget
pw.ICON = I('back.png')
pw.HELP = _('Options specific to the output format.')
@@ -193,8 +193,8 @@ class Config(ResizableDialog, Ui_Dialog):
input_widget = None
name = self.plumber.input_plugin.name.lower().replace(' ', '_')
try:
- input_widget = __import__('calibre.gui2.convert.'+name,
- fromlist=[1])
+ input_widget = importlib.import_module(
+ 'calibre.gui2.convert.'+name)
pw = input_widget.PluginWidget
pw.ICON = I('forward.png')
pw.HELP = _('Options specific to the input format.')
diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py
index beaca77a38..10602fb28c 100644
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@@ -226,10 +226,18 @@ class Comments(Base):
class Text(Base):
def setup_ui(self, parent):
+ if self.col_metadata['display'].get('is_names', False):
+ self.sep = u' & '
+ else:
+ self.sep = u', '
values = self.all_values = list(self.db.all_custom(num=self.col_id))
values.sort(key=sort_key)
if self.col_metadata['is_multiple']:
w = MultiCompleteLineEdit(parent)
+ w.set_separator(self.sep.strip())
+ if self.sep == u' & ':
+ w.set_space_before_sep(True)
+ w.set_add_separator(tweaks['authors_completer_append_separator'])
w.update_items_cache(values)
w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
else:
@@ -261,12 +269,12 @@ class Text(Base):
if self.col_metadata['is_multiple']:
if not val:
val = []
- self.widgets[1].setText(u', '.join(val))
+ self.widgets[1].setText(self.sep.join(val))
def getter(self):
if self.col_metadata['is_multiple']:
val = unicode(self.widgets[1].text()).strip()
- ans = [x.strip() for x in val.split(',') if x.strip()]
+ ans = [x.strip() for x in val.split(self.sep.strip()) if x.strip()]
if not ans:
ans = None
return ans
@@ -847,13 +855,20 @@ class BulkText(BulkBase):
self.main_widget.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
self.adding_widget = self.main_widget
- w = RemoveTags(parent, values)
- self.widgets.append(QLabel('&'+self.col_metadata['name']+': ' +
- _('tags to remove'), parent))
- self.widgets.append(w)
- self.removing_widget = w
- w.tags_box.textChanged.connect(self.a_c_checkbox_changed)
- w.checkbox.stateChanged.connect(self.a_c_checkbox_changed)
+ if not self.col_metadata['display'].get('is_names', False):
+ w = RemoveTags(parent, values)
+ self.widgets.append(QLabel('&'+self.col_metadata['name']+': ' +
+ _('tags to remove'), parent))
+ self.widgets.append(w)
+ self.removing_widget = w
+ self.main_widget.set_separator(',')
+ w.tags_box.textChanged.connect(self.a_c_checkbox_changed)
+ w.checkbox.stateChanged.connect(self.a_c_checkbox_changed)
+ else:
+ self.main_widget.set_separator('&')
+ self.main_widget.set_space_before_sep(True)
+ self.main_widget.set_add_separator(
+ tweaks['authors_completer_append_separator'])
else:
self.make_widgets(parent, MultiCompleteComboBox)
self.main_widget.set_separator(None)
@@ -882,21 +897,26 @@ class BulkText(BulkBase):
if not self.a_c_checkbox.isChecked():
return
if self.col_metadata['is_multiple']:
- remove_all, adding, rtext = self.gui_val
- remove = set()
- if remove_all:
- remove = set(self.db.all_custom(num=self.col_id))
+ if self.col_metadata['display'].get('is_names', False):
+ val = self.gui_val
+ add = [v.strip() for v in val.split('&') if v.strip()]
+ self.db.set_custom_bulk(book_ids, add, num=self.col_id)
else:
- txt = rtext
+ remove_all, adding, rtext = self.gui_val
+ remove = set()
+ if remove_all:
+ remove = set(self.db.all_custom(num=self.col_id))
+ else:
+ txt = rtext
+ if txt:
+ remove = set([v.strip() for v in txt.split(',')])
+ txt = adding
if txt:
- remove = set([v.strip() for v in txt.split(',')])
- txt = adding
- if txt:
- add = set([v.strip() for v in txt.split(',')])
- else:
- add = set()
- self.db.set_custom_bulk_multiple(book_ids, add=add, remove=remove,
- num=self.col_id)
+ add = set([v.strip() for v in txt.split(',')])
+ else:
+ add = set()
+ self.db.set_custom_bulk_multiple(book_ids, add=add,
+ remove=remove, num=self.col_id)
else:
val = self.gui_val
val = self.normalize_ui_val(val)
@@ -905,10 +925,11 @@ class BulkText(BulkBase):
def getter(self):
if self.col_metadata['is_multiple']:
- return self.removing_widget.checkbox.isChecked(), \
- unicode(self.adding_widget.text()), \
- unicode(self.removing_widget.tags_box.text())
-
+ if not self.col_metadata['display'].get('is_names', False):
+ return self.removing_widget.checkbox.isChecked(), \
+ unicode(self.adding_widget.text()), \
+ unicode(self.removing_widget.tags_box.text())
+ return unicode(self.adding_widget.text())
val = unicode(self.main_widget.currentText()).strip()
if not val:
val = None
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 215e67c46f..ab2177cef1 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -64,7 +64,7 @@ class DeviceJob(BaseJob): # {{{
self.result = self.func(*self.args, **self.kwargs)
if self._aborted:
return
- except (Exception, SystemExit), err:
+ except (Exception, SystemExit) as err:
if self._aborted:
return
self.failed = True
@@ -162,7 +162,7 @@ class DeviceManager(Thread): # {{{
dev.reset(detected_device=detected_device,
report_progress=self.report_progress)
dev.open(self.current_library_uuid)
- except OpenFeedback, e:
+ except OpenFeedback as e:
if dev not in self.ejected_devices:
self.open_feedback_msg(dev.get_gui_name(), e.feedback_msg)
self.ejected_devices.add(dev)
diff --git a/src/calibre/gui2/device_drivers/configwidget.py b/src/calibre/gui2/device_drivers/configwidget.py
index 97c492b550..fc7e16e639 100644
--- a/src/calibre/gui2/device_drivers/configwidget.py
+++ b/src/calibre/gui2/device_drivers/configwidget.py
@@ -133,7 +133,7 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
try:
validation_formatter.validate(tmpl)
return True
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid template'),
''+_('The template %s is invalid:')%tmpl + \
'
'+unicode(err), show=True)
diff --git a/src/calibre/gui2/dialogs/catalog.py b/src/calibre/gui2/dialogs/catalog.py
index ebca7235eb..a8f7ed160f 100644
--- a/src/calibre/gui2/dialogs/catalog.py
+++ b/src/calibre/gui2/dialogs/catalog.py
@@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import os, sys
+import os, sys, importlib
from calibre.customize.ui import config
from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
@@ -43,8 +43,7 @@ class Catalog(ResizableDialog, Ui_Dialog):
name = plugin.name.lower().replace(' ', '_')
if type(plugin) in builtin_plugins:
try:
- catalog_widget = __import__('calibre.gui2.catalog.'+name,
- fromlist=[1])
+ catalog_widget = importlib.import_module('calibre.gui2.catalog.'+name)
pw = catalog_widget.PluginWidget()
pw.initialize(name, db)
pw.ICON = I('forward.png')
@@ -75,7 +74,7 @@ class Catalog(ResizableDialog, Ui_Dialog):
# Import the dynamic PluginWidget() from .py file provided in plugin.zip
try:
sys.path.insert(0, plugin.resources_path)
- catalog_widget = __import__(name, fromlist=[1])
+ catalog_widget = importlib.import_module(name)
pw = catalog_widget.PluginWidget()
pw.initialize(name)
pw.ICON = I('forward.png')
diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py
index 560090d2b3..95f99d4034 100644
--- a/src/calibre/gui2/dialogs/check_library.py
+++ b/src/calibre/gui2/dialogs/check_library.py
@@ -68,7 +68,7 @@ class DBCheck(QDialog): # {{{
self.start_load()
return
QTimer.singleShot(0, self.do_one_dump)
- except Exception, e:
+ except Exception as e:
import traceback
self.error = (as_unicode(e), traceback.format_exc())
self.reject()
@@ -90,7 +90,7 @@ class DBCheck(QDialog): # {{{
self.conn.commit()
QTimer.singleShot(0, self.do_one_load)
- except Exception, e:
+ except Exception as e:
import traceback
self.error = (as_unicode(e), traceback.format_exc())
self.reject()
@@ -111,7 +111,7 @@ class DBCheck(QDialog): # {{{
self.pb.setValue(self.pb.value() + 1)
self.count -= 1
QTimer.singleShot(0, self.do_one_load)
- except Exception, e:
+ except Exception as e:
import traceback
self.error = (as_unicode(e), traceback.format_exc())
self.reject()
diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py
index 9b25545252..0683f2cb91 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@@ -120,7 +120,7 @@ class MyBlockingBusy(QDialog): # {{{
self.msg.setText(self.msg_text.format(self.phases[self.current_phase],
percent))
self.do_one(id)
- except Exception, err:
+ except Exception as err:
import traceback
try:
err = unicode(err)
@@ -653,7 +653,10 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
if self.destination_field_fm['is_multiple']:
if self.comma_separated.isChecked():
- if dest == 'authors':
+ if dest == 'authors' or \
+ (self.destination_field_fm['is_custom'] and
+ self.destination_field_fm['datatype'] == 'text' and
+ self.destination_field_fm['display'].get('is_names', False)):
splitter = ' & '
else:
splitter = ','
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 9efe7f7160..f6b7b94453 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -76,7 +76,7 @@ class CoverFetcher(Thread): # {{{
self.cover_data, self.errors = download_cover(mi,
timeout=self.timeout)
- except Exception, e:
+ except Exception as e:
self.exception = e
self.traceback = traceback.format_exc()
print self.traceback
@@ -183,7 +183,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
try:
cf = open(_file, "rb")
cover = cf.read()
- except IOError, e:
+ except IOError as e:
d = error_dialog(self, _('Error reading file'),
_("There was an error reading from file:
") + _file + "
"+str(e))
d.exec_()
diff --git a/src/calibre/gui2/dialogs/tag_editor.py b/src/calibre/gui2/dialogs/tag_editor.py
index 6bd8eb7dbe..bf3bb9fd4e 100644
--- a/src/calibre/gui2/dialogs/tag_editor.py
+++ b/src/calibre/gui2/dialogs/tag_editor.py
@@ -122,6 +122,8 @@ class TagEditor(QDialog, Ui_TagEditor):
tags = unicode(self.add_tag_input.text()).split(',')
for tag in tags:
tag = tag.strip()
+ if not tag:
+ continue
for item in self.available_tags.findItems(tag, Qt.MatchFixedString):
self.available_tags.takeItem(self.available_tags.row(item))
if tag not in self.tags:
diff --git a/src/calibre/gui2/dialogs/user_profiles.py b/src/calibre/gui2/dialogs/user_profiles.py
index 5453a90766..d66d02d211 100644
--- a/src/calibre/gui2/dialogs/user_profiles.py
+++ b/src/calibre/gui2/dialogs/user_profiles.py
@@ -237,7 +237,7 @@ class %(classname)s(%(base_class)s):
try:
compile_recipe(src)
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid input'),
_('Could not create recipe. Error:
%s')%str(err)).exec_()
return
@@ -246,7 +246,7 @@ class %(classname)s(%(base_class)s):
src = unicode(self.source_code.toPlainText())
try:
title = compile_recipe(src).title
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid input'),
_('
Could not create recipe. Error:
%s')%str(err)).exec_()
return
@@ -333,7 +333,7 @@ class %(classname)s(%(base_class)s):
try:
profile = open(file, 'rb').read().decode('utf-8')
title = compile_recipe(profile).title
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid input'),
_('
Could not create recipe. Error:
%s')%str(err)).exec_()
return
diff --git a/src/calibre/gui2/dnd.py b/src/calibre/gui2/dnd.py
index 928de72578..1f9dbdfa34 100644
--- a/src/calibre/gui2/dnd.py
+++ b/src/calibre/gui2/dnd.py
@@ -35,7 +35,7 @@ class Worker(Thread): # {{{
try:
br = browser()
br.retrieve(self.url, self.fpath, self.callback)
- except Exception, e:
+ except Exception as e:
self.err = as_unicode(e)
import traceback
self.tb = traceback.format_exc()
diff --git a/src/calibre/gui2/email.py b/src/calibre/gui2/email.py
index c84b3180f7..81c1d9c255 100644
--- a/src/calibre/gui2/email.py
+++ b/src/calibre/gui2/email.py
@@ -116,7 +116,7 @@ class Emailer(Thread): # {{{
try:
self.sendmail(job)
break
- except Exception, e:
+ except Exception as e:
if not self._run:
return
import traceback
diff --git a/src/calibre/gui2/library/delegates.py b/src/calibre/gui2/library/delegates.py
index 3a090f8102..0f74500099 100644
--- a/src/calibre/gui2/library/delegates.py
+++ b/src/calibre/gui2/library/delegates.py
@@ -398,7 +398,7 @@ class CcTemplateDelegate(QStyledItemDelegate): # {{{
val = unicode(editor.textbox.toPlainText())
try:
validation_formatter.validate(val)
- except Exception, err:
+ except Exception as err:
error_dialog(self.parent(), _('Invalid template'),
'
'+_('The template %s is invalid:')%val + \
'
'+str(err), show=True)
diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py
index a200562ea9..c921ea125f 100644
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@@ -640,18 +640,18 @@ class BooksModel(QAbstractTableModel): # {{{
return self.bool_yes_icon
return self.bool_blank_icon
- def text_type(r, mult=False, idx=-1):
+ def text_type(r, mult=None, idx=-1):
text = self.db.data[r][idx]
- if text and mult:
- return QVariant(', '.join(sorted(text.split('|'),key=sort_key)))
+ if text and mult is not None:
+ if mult:
+ return QVariant(u' & '.join(text.split('|')))
+ return QVariant(u', '.join(sorted(text.split('|'),key=sort_key)))
return QVariant(text)
- def decorated_text_type(r, mult=False, idx=-1):
+ def decorated_text_type(r, idx=-1):
text = self.db.data[r][idx]
if force_to_bool(text) is not None:
return None
- if text and mult:
- return QVariant(', '.join(sorted(text.split('|'),key=sort_key)))
return QVariant(text)
def number_type(r, idx=-1):
@@ -659,7 +659,7 @@ class BooksModel(QAbstractTableModel): # {{{
self.dc = {
'title' : functools.partial(text_type,
- idx=self.db.field_metadata['title']['rec_index'], mult=False),
+ idx=self.db.field_metadata['title']['rec_index'], mult=None),
'authors' : functools.partial(authors,
idx=self.db.field_metadata['authors']['rec_index']),
'size' : functools.partial(size,
@@ -671,14 +671,14 @@ class BooksModel(QAbstractTableModel): # {{{
'rating' : functools.partial(rating_type,
idx=self.db.field_metadata['rating']['rec_index']),
'publisher': functools.partial(text_type,
- idx=self.db.field_metadata['publisher']['rec_index'], mult=False),
+ idx=self.db.field_metadata['publisher']['rec_index'], mult=None),
'tags' : functools.partial(tags,
idx=self.db.field_metadata['tags']['rec_index']),
'series' : functools.partial(series_type,
idx=self.db.field_metadata['series']['rec_index'],
siix=self.db.field_metadata['series_index']['rec_index']),
'ondevice' : functools.partial(text_type,
- idx=self.db.field_metadata['ondevice']['rec_index'], mult=False),
+ idx=self.db.field_metadata['ondevice']['rec_index'], mult=None),
}
self.dc_decorator = {
@@ -692,11 +692,12 @@ class BooksModel(QAbstractTableModel): # {{{
datatype = self.custom_columns[col]['datatype']
if datatype in ('text', 'comments', 'composite', 'enumeration'):
mult=self.custom_columns[col]['is_multiple']
+ if mult is not None:
+ mult = self.custom_columns[col]['display'].get('is_names', False)
self.dc[col] = functools.partial(text_type, idx=idx, mult=mult)
if datatype in ['text', 'composite', 'enumeration'] and not mult:
if self.custom_columns[col]['display'].get('use_decorations', False):
- self.dc[col] = functools.partial(decorated_text_type,
- idx=idx, mult=mult)
+ self.dc[col] = functools.partial(decorated_text_type, idx=idx)
self.dc_decorator[col] = functools.partial(
bool_type_decorator, idx=idx,
bool_cols_are_tristate=
diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index c62936a46f..0cce33da9e 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -78,6 +78,7 @@ class BooksView(QTableView): # {{{
self.pubdate_delegate = PubDateDelegate(self)
self.tags_delegate = CompleteDelegate(self, ',', 'all_tags')
self.authors_delegate = CompleteDelegate(self, '&', 'all_author_names', True)
+ self.cc_names_delegate = CompleteDelegate(self, '&', 'all_custom', True)
self.series_delegate = TextDelegate(self)
self.publisher_delegate = TextDelegate(self)
self.text_delegate = TextDelegate(self)
@@ -410,6 +411,7 @@ class BooksView(QTableView): # {{{
self.save_state()
self._model.set_database(db)
self.tags_delegate.set_database(db)
+ self.cc_names_delegate.set_database(db)
self.authors_delegate.set_database(db)
self.series_delegate.set_auto_complete_function(db.all_series)
self.publisher_delegate.set_auto_complete_function(db.all_publishers)
@@ -431,12 +433,17 @@ class BooksView(QTableView): # {{{
self.setItemDelegateForColumn(cm.index(colhead), delegate)
elif cc['datatype'] == 'comments':
self.setItemDelegateForColumn(cm.index(colhead), self.cc_comments_delegate)
- elif cc['datatype'] in ('text', 'series'):
+ elif cc['datatype'] == 'text':
if cc['is_multiple']:
- self.setItemDelegateForColumn(cm.index(colhead), self.tags_delegate)
+ if cc['display'].get('is_names', False):
+ self.setItemDelegateForColumn(cm.index(colhead),
+ self.cc_names_delegate)
+ else:
+ self.setItemDelegateForColumn(cm.index(colhead),
+ self.tags_delegate)
else:
self.setItemDelegateForColumn(cm.index(colhead), self.cc_text_delegate)
- elif cc['datatype'] in ('int', 'float'):
+ elif cc['datatype'] in ('series', 'int', 'float'):
self.setItemDelegateForColumn(cm.index(colhead), self.cc_text_delegate)
elif cc['datatype'] == 'bool':
self.setItemDelegateForColumn(cm.index(colhead), self.cc_bool_delegate)
diff --git a/src/calibre/gui2/lrf_renderer/main.py b/src/calibre/gui2/lrf_renderer/main.py
index 2acfd3c9a7..e68e04adcf 100644
--- a/src/calibre/gui2/lrf_renderer/main.py
+++ b/src/calibre/gui2/lrf_renderer/main.py
@@ -35,7 +35,7 @@ class RenderWorker(QThread):
self.stream = None
if self.aborted:
self.lrf = None
- except Exception, err:
+ except Exception as err:
self.lrf, self.stream = None, None
self.exception = err
self.formatted_traceback = traceback.format_exc()
diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py
index 976b679726..c67ec8c2b4 100644
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@@ -399,7 +399,7 @@ def main(args=sys.argv):
if __name__ == '__main__':
try:
sys.exit(main())
- except Exception, err:
+ except Exception as err:
if not iswindows: raise
tb = traceback.format_exc()
from PyQt4.QtGui import QErrorMessage
diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index d5a8de7b67..635a037482 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -656,7 +656,7 @@ class Cover(ImageView): # {{{
try:
cf = open(_file, "rb")
cover = cf.read()
- except IOError, e:
+ except IOError as e:
d = error_dialog(self, _('Error reading file'),
_("
There was an error reading from file:
")
+ _file + "
"+str(e))
diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py
index 461f56b60c..7a7f49dabf 100644
--- a/src/calibre/gui2/metadata/bulk_download.py
+++ b/src/calibre/gui2/metadata/bulk_download.py
@@ -88,7 +88,7 @@ class DownloadMetadata(Thread):
def run(self):
try:
self._run()
- except Exception, e:
+ except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index 3b6dd0e253..5b17b454e7 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -303,7 +303,7 @@ class MetadataSingleDialogBase(ResizableDialog):
return False
self.books_to_refresh |= getattr(widget, 'books_to_refresh',
set([]))
- except IOError, err:
+ except IOError as err:
if err.errno == 13: # Permission denied
import traceback
fname = err.filename if err.filename else 'file'
diff --git a/src/calibre/gui2/notify.py b/src/calibre/gui2/notify.py
index 501f7007eb..947d98f1a4 100644
--- a/src/calibre/gui2/notify.py
+++ b/src/calibre/gui2/notify.py
@@ -34,7 +34,7 @@ class DBUSNotifier(Notifier):
import dbus
self.dbus = dbus
self._notify = dbus.Interface(dbus.SessionBus().get_object(server, path), interface)
- except Exception, err:
+ except Exception as err:
self.ok = False
self.err = str(err)
diff --git a/src/calibre/gui2/preferences/conversion.py b/src/calibre/gui2/preferences/conversion.py
index 8de9ee1661..b5240227d3 100644
--- a/src/calibre/gui2/preferences/conversion.py
+++ b/src/calibre/gui2/preferences/conversion.py
@@ -5,6 +5,8 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal '
__docformat__ = 'restructuredtext en'
+import importlib
+
from PyQt4.Qt import QIcon, Qt, QStringListModel, QVariant
from calibre.gui2.preferences import ConfigWidgetBase, test_widget, AbortCommit
@@ -104,8 +106,8 @@ class OutputOptions(Base):
for plugin in output_format_plugins():
name = plugin.name.lower().replace(' ', '_')
try:
- output_widget = __import__('calibre.gui2.convert.'+name,
- fromlist=[1])
+ output_widget = importlib.import_module(
+ 'calibre.gui2.convert.'+name)
pw = output_widget.PluginWidget
self.conversion_widgets.append(pw)
except ImportError:
diff --git a/src/calibre/gui2/preferences/create_custom_column.py b/src/calibre/gui2/preferences/create_custom_column.py
index cee34f150e..f476845f8b 100644
--- a/src/calibre/gui2/preferences/create_custom_column.py
+++ b/src/calibre/gui2/preferences/create_custom_column.py
@@ -63,7 +63,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
for col, name in [('isbn', _('ISBN')), ('formats', _('Formats')),
('last_modified', _('Modified Date')), ('yesno', _('Yes/No')),
('tags', _('Tags')), ('series', _('Series')), ('rating',
- _('Rating'))]:
+ _('Rating')), ('people', _("People's names"))]:
text += ' %s,'%(col, name)
text = text[:-1]
self.shortcuts.setText(text)
@@ -125,6 +125,8 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
self.datatype_changed()
if ct in ['text', 'composite', 'enumeration']:
self.use_decorations.setChecked(c['display'].get('use_decorations', False))
+ elif ct == '*text':
+ self.is_names.setChecked(c['display'].get('is_names', False))
self.exec_()
def shortcut_activated(self, url):
@@ -134,6 +136,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
'tags' : 1,
'series': 3,
'rating': 8,
+ 'people': 1,
}.get(which, 10))
self.column_name_box.setText(which)
self.column_heading_box.setText({
@@ -143,7 +146,9 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
'tags': _('My Tags'),
'series': _('My Series'),
'rating': _('My Rating'),
- 'last_modified':_('Modified Date')}[which])
+ 'last_modified':_('Modified Date'),
+ 'people': _('People')}[which])
+ self.is_names.setChecked(which == 'people')
if self.composite_box.isVisible():
self.composite_box.setText(
{
@@ -153,7 +158,6 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
}[which])
self.composite_sort_by.setCurrentIndex(2 if which == 'last_modified' else 0)
-
def datatype_changed(self, *args):
try:
col_type = self.column_types[self.column_type_box.currentIndex()]['datatype']
@@ -167,6 +171,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
for x in ('box', 'default_label', 'label'):
getattr(self, 'enum_'+x).setVisible(col_type == 'enumeration')
self.use_decorations.setVisible(col_type in ['text', 'composite', 'enumeration'])
+ self.is_names.setVisible(col_type == '*text')
def accept(self):
col = unicode(self.column_name_box.text()).strip()
@@ -241,6 +246,8 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
return self.simple_error('', _('The value "{0}" is in the '
'list more than once').format(l[i]))
display_dict = {'enum_values': l}
+ elif col_type == 'text' and is_multiple:
+ display_dict = {'is_names': self.is_names.isChecked()}
if col_type in ['text', 'composite', 'enumeration']:
display_dict['use_decorations'] = self.use_decorations.checkState()
diff --git a/src/calibre/gui2/preferences/create_custom_column.ui b/src/calibre/gui2/preferences/create_custom_column.ui
index 3290d3c846..619b0c6212 100644
--- a/src/calibre/gui2/preferences/create_custom_column.ui
+++ b/src/calibre/gui2/preferences/create_custom_column.ui
@@ -9,7 +9,7 @@
0
0
- 603
+ 831
344
@@ -110,27 +110,37 @@
-
-
- Show checkmarks
-
Show check marks in the GUI. Values of 'yes', 'checked', and 'true'
will show a green check. Values of 'no', 'unchecked', and 'false' will show a red X.
Everything else will show nothing.
+
+ Show checkmarks
+
+
+
+ -
+
+
+ Check this box if this column contains names, like the authors column.
+
+
+ Contains names
+
-
-
- Qt::Horizontal
-
10
0
+
+ Qt::Horizontal
+
20
@@ -241,25 +251,25 @@ Everything else will show nothing.
-
-
- Show in tags browser
-
If checked, this column will appear in the tags browser as a category
+
+ Show in tags browser
+
-
-
- Qt::Horizontal
-
10
0
+
+ Qt::Horizontal
+
20
diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py
index 206f2b97fb..a2d2236039 100644
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@@ -64,8 +64,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('tags_browser_collapse_at', gprefs)
choices = set([k for k in db.field_metadata.all_field_keys()
- if db.field_metadata[k]['is_category'] and
- db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']])
+ if db.field_metadata[k]['is_category'] and
+ (db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']) and
+ not db.field_metadata[k]['display'].get('is_names', False)])
choices -= set(['authors', 'publisher', 'formats', 'news', 'identifiers'])
choices |= set(['search'])
self.opt_categories_using_hierarchy.update_items_cache(choices)
diff --git a/src/calibre/gui2/preferences/plugboard.py b/src/calibre/gui2/preferences/plugboard.py
index e1dc6b03bd..8f2b084d76 100644
--- a/src/calibre/gui2/preferences/plugboard.py
+++ b/src/calibre/gui2/preferences/plugboard.py
@@ -251,7 +251,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
if d != 0:
try:
validation_formatter.validate(s)
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid template'),
'
'+_('The template %s is invalid:')%s + \
'
'+str(err), show=True)
diff --git a/src/calibre/gui2/preferences/save_template.py b/src/calibre/gui2/preferences/save_template.py
index 4c00a14c0f..96ca8c8945 100644
--- a/src/calibre/gui2/preferences/save_template.py
+++ b/src/calibre/gui2/preferences/save_template.py
@@ -57,7 +57,7 @@ class SaveTemplate(QWidget, Ui_Form):
return question_dialog(self, _('Constant template'),
_('The template contains no {fields}, so all '
'books will have the same name. Is this OK?'))
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid template'),
'
'+_('The template %s is invalid:')%tmpl + \
'
'+str(err), show=True)
diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index 34fa3a8b10..6b1ce2f851 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -658,8 +658,7 @@ class TagTreeItem(object): # {{{
def tag_data(self, role):
tag = self.tag
- if tag.category == 'authors' and \
- tweaks['categories_use_field_for_author_name'] == 'author_sort':
+ if tag.use_sort_as_name:
name = tag.sort
tt_author = True
else:
@@ -1275,6 +1274,7 @@ class TagsModel(QAbstractItemModel): # {{{
if len(components) == 0 or '.'.join(components) != tag.original_name:
components = [tag.original_name]
if (not tag.is_hierarchical) and (in_uc or
+ (fm['is_custom'] and fm['display'].get('is_names', False)) or
key in ['authors', 'publisher', 'news', 'formats', 'rating'] or
key not in self.db.prefs.get('categories_using_hierarchy', []) or
len(components) == 1):
diff --git a/src/calibre/gui2/viewer/dictionary.py b/src/calibre/gui2/viewer/dictionary.py
index dad8d1821c..d5dd4d0a86 100644
--- a/src/calibre/gui2/viewer/dictionary.py
+++ b/src/calibre/gui2/viewer/dictionary.py
@@ -36,7 +36,7 @@ class Lookup(QThread):
def run(self):
try:
self.define()
- except Exception, e:
+ except Exception as e:
import traceback
self.exception = e
self.traceback = traceback.format_exc()
diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py
index c570a6e159..ea0509b51a 100644
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@@ -97,7 +97,7 @@ class FilenamePattern(QWidget, Ui_Form):
def do_test(self):
try:
pat = self.pattern()
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid regular expression'),
_('Invalid regular expression: %s')%err).exec_()
return
diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py
index c629b10b5d..a32347dc72 100644
--- a/src/calibre/gui2/wizard/__init__.py
+++ b/src/calibre/gui2/wizard/__init__.py
@@ -565,7 +565,7 @@ def move_library(oldloc, newloc, parent, callback_on_complete):
# Try to load existing library at new location
try:
LibraryDatabase2(newloc)
- except Exception, err:
+ except Exception as err:
det = traceback.format_exc()
error_dialog(parent, _('Invalid database'),
_('
An invalid library already exists at '
@@ -577,7 +577,7 @@ def move_library(oldloc, newloc, parent, callback_on_complete):
else:
callback(newloc)
return
- except Exception, err:
+ except Exception as err:
det = traceback.format_exc()
error_dialog(parent, _('Could not move library'),
unicode(err), det, show=True)
diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 19ef7e213c..e5864ceaaf 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -15,7 +15,7 @@ from calibre.utils.config import tweaks, prefs
from calibre.utils.date import parse_date, now, UNDEFINED_DATE
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.pyparsing import ParseException
-from calibre.ebooks.metadata import title_sort
+from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre import prints
@@ -1023,7 +1023,11 @@ class SortKeyGenerator(object):
if val:
sep = fm['is_multiple']
if sep:
- val = sep.join(sorted(val.split(sep),
+ if fm['display'].get('is_names', False):
+ val = sep.join(
+ [author_to_author_sort(v) for v in val.split(sep)])
+ else:
+ val = sep.join(sorted(val.split(sep),
key=self.string_sort_key))
val = self.string_sort_key(val)
diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py
index dec55f2b02..48960ac871 100644
--- a/src/calibre/library/custom_columns.py
+++ b/src/calibre/library/custom_columns.py
@@ -117,7 +117,7 @@ class CustomColumns(object):
if x is None:
return []
if isinstance(x, (str, unicode, bytes)):
- x = x.split(',')
+ x = x.split('&' if d['display'].get('is_names', False) else',')
x = [y.strip() for y in x if y.strip()]
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
unicode) else y for y in x]
@@ -482,8 +482,11 @@ class CustomColumns(object):
set_val = val if data['is_multiple'] else [val]
existing = getter()
if not existing:
- existing = []
- for x in set(set_val) - set(existing):
+ existing = set([])
+ else:
+ existing = set(existing)
+ # preserve the order in set_val
+ for x in [v for v in set_val if v not in existing]:
# normalized types are text and ratings, so we can do this check
# to see if we need to re-add the value
if not x:
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index e751d4d522..b23c8ff4a4 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -48,7 +48,7 @@ class Tag(object):
def __init__(self, name, id=None, count=0, state=0, avg=0, sort=None,
tooltip=None, icon=None, category=None, id_set=None,
- is_editable = True, is_searchable=True):
+ is_editable = True, is_searchable=True, use_sort_as_name=False):
self.name = self.original_name = name
self.id = id
self.count = count
@@ -59,6 +59,7 @@ class Tag(object):
self.id_set = id_set if id_set is not None else set([])
self.avg_rating = avg/2.0 if avg is not None else 0
self.sort = sort
+ self.use_sort_as_name = use_sort_as_name
if self.avg_rating > 0:
if tooltip:
tooltip = tooltip + ': '
@@ -1323,6 +1324,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
for l in list:
(id, val) = (l[0], l[1])
tids[category][val] = (id, '{0:05.2f}'.format(val))
+ elif cat['datatype'] == 'text' and cat['is_multiple'] and \
+ cat['display'].get('is_names', False):
+ for l in list:
+ (id, val) = (l[0], l[1])
+ tids[category][val] = (id, author_to_author_sort(val))
else:
for l in list:
(id, val) = (l[0], l[1])
@@ -1480,11 +1486,20 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
reverse=True
items.sort(key=kf, reverse=reverse)
+ if tweaks['categories_use_field_for_author_name'] == 'author_sort' and\
+ (category == 'authors' or
+ (cat['display'].get('is_names', False) and
+ cat['is_custom'] and cat['is_multiple'] and
+ cat['datatype'] == 'text')):
+ use_sort_as_name = True
+ else:
+ use_sort_as_name = False
is_editable = category not in ['news', 'rating']
categories[category] = [tag_class(formatter(r.n), count=r.c, id=r.id,
avg=avgr(r), sort=r.s, icon=icon,
tooltip=tooltip, category=category,
- id_set=r.id_set, is_editable=is_editable)
+ id_set=r.id_set, is_editable=is_editable,
+ use_sort_as_name=use_sort_as_name)
for r in items]
#print 'end phase "tags list":', time.clock() - last, 'seconds'
diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py
index 83d395dec5..dba6abbfa5 100644
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@@ -222,7 +222,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
# cherrypy.engine.signal_handler.subscribe()
cherrypy.engine.block()
- except Exception, e:
+ except Exception as e:
self.exception = e
finally:
self.is_running = False
diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py
index f1d9b9785c..895fbb06e9 100644
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@@ -15,7 +15,7 @@ from calibre import isbytestring, force_unicode, fit_image, \
prepare_string_for_xml
from calibre.utils.ordered_dict import OrderedDict
from calibre.utils.filenames import ascii_filename
-from calibre.utils.config import prefs, tweaks
+from calibre.utils.config import prefs
from calibre.utils.icu import sort_key
from calibre.utils.magick import Image
from calibre.library.comments import comments_to_html
@@ -155,8 +155,7 @@ def get_category_items(category, items, restriction, datatype, prefix): # {{{
'
{1}
'
'{2}
')
rating, rstring = render_rating(i.avg_rating, prefix)
- if i.category == 'authors' and \
- tweaks['categories_use_field_for_author_name'] == 'author_sort':
+ if i.use_sort_as_name:
name = xml(i.sort)
else:
name = xml(i.name)
@@ -696,7 +695,10 @@ class BrowseServer(object):
xml(href, True),
xml(val if len(dbtags) == 1 else tag.name),
xml(key, True)))
- join = ' & ' if key == 'authors' else ', '
+ join = ' & ' if key == 'authors' or \
+ (fm['is_custom'] and
+ fm['display'].get('is_names', False)) \
+ else ', '
args[key] = join.join(vals)
added_key = True
if not added_key:
diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py
index 11ea2b951e..919f5a7969 100644
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@@ -169,7 +169,7 @@ class ContentServer(object):
return cover
return save_cover_data_to(img, 'img.jpg', return_data=True,
resize_to=(width, height))
- except Exception, err:
+ except Exception as err:
import traceback
cherrypy.log.error('Failed to generate cover:')
cherrypy.log.error(traceback.print_exc())
diff --git a/src/calibre/library/server/main.py b/src/calibre/library/server/main.py
index e4de710c6a..3a6f918022 100644
--- a/src/calibre/library/server/main.py
+++ b/src/calibre/library/server/main.py
@@ -69,7 +69,7 @@ def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
if pid > 0:
# exit first parent
sys.exit(0)
- except OSError, e:
+ except OSError as e:
print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror)
sys.exit(1)
@@ -84,7 +84,7 @@ def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
if pid > 0:
# exit from second parent
sys.exit(0)
- except OSError, e:
+ except OSError as e:
print >>sys.stderr, "fork #2 failed: %d (%s)" % (e.errno, e.strerror)
sys.exit(1)
diff --git a/src/calibre/library/server/opds.py b/src/calibre/library/server/opds.py
index e7fdffbbbb..bdd35c16f1 100644
--- a/src/calibre/library/server/opds.py
+++ b/src/calibre/library/server/opds.py
@@ -22,7 +22,6 @@ from calibre.library.server.utils import format_tag_string, Offsets
from calibre import guess_type, prepare_string_for_xml as xml
from calibre.utils.icu import sort_key
from calibre.utils.ordered_dict import OrderedDict
-from calibre.utils.config import tweaks
BASE_HREFS = {
0 : '/stanza',
@@ -126,8 +125,7 @@ def CATALOG_ENTRY(item, item_kind, base_href, version, updated,
count = (_('%d books') if item.count > 1 else _('%d book'))%item.count
if ignore_count:
count = ''
- if item.category == 'authors' and \
- tweaks['categories_use_field_for_author_name'] == 'author_sort':
+ if item.use_sort_as_name:
name = item.sort
else:
name = item.name
diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py
index 2075ab5880..511106fe7b 100644
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@@ -193,7 +193,7 @@ def load_c_extensions(conn, debug=DEBUG):
conn.load_extension(ext_path)
conn.enable_load_extension(False)
return True
- except Exception, e:
+ except Exception as e:
if debug:
print 'Failed to load high performance sqlite C extension'
print e
@@ -247,14 +247,14 @@ class DBThread(Thread):
if func == 'dump':
try:
ok, res = True, tuple(self.conn.iterdump())
- except Exception, err:
+ except Exception as err:
ok, res = False, (err, traceback.format_exc())
elif func == 'create_dynamic_filter':
try:
f = DynamicFilter(args[0])
self.conn.create_function(args[0], 1, f)
ok, res = True, f
- except Exception, err:
+ except Exception as err:
ok, res = False, (err, traceback.format_exc())
else:
bfunc = getattr(self.conn, func)
@@ -263,7 +263,7 @@ class DBThread(Thread):
try:
ok, res = True, bfunc(*args, **kwargs)
break
- except OperationalError, err:
+ except OperationalError as err:
# Retry if unable to open db file
e = str(err)
if 'unable to open' not in e or i == 2:
@@ -273,10 +273,10 @@ class DBThread(Thread):
reprlib.repr(kwargs))
raise
time.sleep(0.5)
- except Exception, err:
+ except Exception as err:
ok, res = False, (err, traceback.format_exc())
self.results.put((ok, res))
- except Exception, err:
+ except Exception as err:
self.unhandled_error = (err, traceback.format_exc())
class DatabaseException(Exception):
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 22f8af56c2..dfab13e3b8 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal '
''' Post installation script for linux '''
-import sys, os, cPickle, textwrap, stat
+import sys, os, cPickle, textwrap, stat, importlib
from subprocess import check_call
from calibre import __appname__, prints, guess_type
@@ -59,7 +59,7 @@ for x in {manifest!r}:
shutil.rmtree(x)
else:
os.unlink(x)
- except Exception, e:
+ except Exception as e:
print 'Failed to delete', x
print '\t', e
@@ -285,7 +285,7 @@ class PostInstall:
complete -o nospace -C calibre-complete ebook-convert
'''))
- except TypeError, err:
+ except TypeError as err:
if 'resolve_entities' in str(err):
print 'You need python-lxml >= 2.0.5 for calibre'
sys.exit(1)
@@ -309,7 +309,7 @@ class PostInstall:
for src in entry_points['console_scripts']:
prog, right = src.split('=')
prog = prog.strip()
- module = __import__(right.split(':')[0].strip(), fromlist=['a'])
+ module = importlib.import_module(right.split(':')[0].strip())
parser = getattr(module, 'option_parser', None)
if parser is None:
continue
diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 948611f775..97ef32e9d4 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -493,7 +493,16 @@ Most purchased EPUB books have `DRM `_. Thi
I am getting a "Permission Denied" error?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A permission denied error can occur because of many possible reasons, none of them having anything to do with |app|. You can get permission denied errors if you are using an SD card with write protect enabled. Or if you, or some program you used changed the file permissions of the files in question to read only. Or if there is a filesystem error on the device which caused your operating system to mount the filesystem in read only mode or mark a particular file as read only pending recovery. Or if the files have their owner set to a user other than you. Or if your file is open in another program. You will need to fix the underlying cause of the permissions error before resuming to use |app|. Read the error message carefully, see what file it points to and fix the permissions on that file.
+A permission denied error can occur because of many possible reasons, none of them having anything to do with |app|.
+
+ * You can get permission denied errors if you are using an SD card with write protect enabled.
+ * If you, or some program you used changed the file permissions of the files in question to read only.
+ * If there is a filesystem error on the device which caused your operating system to mount the filesystem in read only mode or mark a particular file as read only pending recovery.
+ * If the files have their owner set to a user other than you.
+ * If your file is open in another program.
+ * If the file resides on a device, you may have reached the limit of a maximum of 256 files in the root of the device. In this case you need to reformat the device/sd card referered to in the error message with a FAT32 filesystem, or delete some files from the SD card/device memory.
+
+You will need to fix the underlying cause of the permissions error before resuming to use |app|. Read the error message carefully, see what file it points to and fix the permissions on that file.
Can I have the comment metadata show up on my reader?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -510,7 +519,7 @@ You have two choices:
How is |app| licensed?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode `_. You are free to use the results of conversions from |app| however you want. You cannot use code, libraries from |app| in your software without maing your software open source. For details, see `The GNU GPL v3 http://www.gnu.org/licenses/gpl.html`_.
+|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode `_. You are free to use the results of conversions from |app| however you want. You cannot use code, libraries from |app| in your software without maing your software open source. For details, see `The GNU GPL v3 `_.
How do I run calibre from my USB stick?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/src/calibre/manual/news.rst b/src/calibre/manual/news.rst
index d0838ccb0f..ed306a168e 100644
--- a/src/calibre/manual/news.rst
+++ b/src/calibre/manual/news.rst
@@ -137,7 +137,7 @@ to the recipe. Finally, lets replace some of the :term:`CSS` that we disabled ea
With these additions, our recipe has become "production quality", indeed it is very close to the actual recipe used by |app| for the *BBC*, shown below:
-.. literalinclude:: ../../../resources/recipes/bbc.recipe
+.. literalinclude:: ../../../recipes/bbc.recipe
This :term:`recipe` explores only the tip of the iceberg when it comes to the power of |app|. To explore more of the abilities of |app| we'll examine a more complex real life example in the next section.
diff --git a/src/calibre/manual/sub_groups.rst b/src/calibre/manual/sub_groups.rst
index c27b3581f8..e5a433dce9 100644
--- a/src/calibre/manual/sub_groups.rst
+++ b/src/calibre/manual/sub_groups.rst
@@ -105,8 +105,8 @@ After creating the saved search, you can use it as a restriction.
.. image:: images/sg_restrict2.jpg
:align: center
- Useful Template Functions
- -------------------------
+Useful Template Functions
+-------------------------
You might want to use the genre information in a template, such as with save to disk or send to device. The question might then be "How do I get the outermost genre name or names?" An |app| template function, subitems, is provided to make doing this easier.
@@ -114,4 +114,4 @@ After creating the saved search, you can use it as a restriction.
{#genre:subitems(0,1)||/}{title} - {authors}
-See :ref:`The |app| template language ` for more information templates and the subitem function.
\ No newline at end of file
+See :ref:`The |app| template language ` for more information templates and the subitem function.
diff --git a/src/calibre/utils/Zeroconf.py b/src/calibre/utils/Zeroconf.py
index f4a7119d16..fbb9b4e71f 100755
--- a/src/calibre/utils/Zeroconf.py
+++ b/src/calibre/utils/Zeroconf.py
@@ -863,7 +863,7 @@ class Engine(threading.Thread):
for socket in rr:
try:
self.readers[socket].handle_read()
- except NonLocalNameException, err:
+ except NonLocalNameException as err:
print err
except UnicodeDecodeError:
if DEBUG:
diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py
index 740e67bee8..2e40275beb 100644
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@@ -316,7 +316,7 @@ class TemplateFormatter(string.Formatter):
self.locals = {}
try:
ans = self.vformat(fmt, [], kwargs).strip()
- except Exception, e:
+ except Exception as e:
if DEBUG:
traceback.print_exc()
ans = error_value + ' ' + e.message
diff --git a/src/calibre/utils/ipc/worker.py b/src/calibre/utils/ipc/worker.py
index e187235a9e..9594f64ae4 100644
--- a/src/calibre/utils/ipc/worker.py
+++ b/src/calibre/utils/ipc/worker.py
@@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import os, cPickle, sys
+import os, cPickle, sys, importlib
from multiprocessing.connection import Client
from threading import Thread
from Queue import Queue
@@ -75,7 +75,7 @@ class Progress(Thread):
def get_func(name):
module, func, notification = PARALLEL_FUNCS[name]
- module = __import__(module, fromlist=[1])
+ module = importlib.import_module(module)
func = getattr(module, func)
return func, notification
diff --git a/src/calibre/utils/lock.py b/src/calibre/utils/lock.py
index 5098c78f90..0b66be963b 100644
--- a/src/calibre/utils/lock.py
+++ b/src/calibre/utils/lock.py
@@ -32,7 +32,7 @@ class WindowsExclFile(object):
None, #No template file
)
break
- except pywintypes.error, err:
+ except pywintypes.error as err:
if getattr(err, 'args', [-1])[0] in (0x20, 0x21):
time.sleep(1)
continue
diff --git a/src/calibre/utils/pdftk.py b/src/calibre/utils/pdftk.py
index 1263b60306..f4fcb8a2e3 100644
--- a/src/calibre/utils/pdftk.py
+++ b/src/calibre/utils/pdftk.py
@@ -56,7 +56,7 @@ def set_metadata(stream, mi):
try:
p.wait()
break
- except OSError, e:
+ except OSError as e:
if e.errno == errno.EINTR:
continue
else:
diff --git a/src/calibre/utils/smtp.py b/src/calibre/utils/smtp.py
index 744021f911..81936a8f71 100644
--- a/src/calibre/utils/smtp.py
+++ b/src/calibre/utils/smtp.py
@@ -76,7 +76,7 @@ def sendmail_direct(from_, to, msg, timeout, localhost, verbose,
s.connect(host, 25)
s.sendmail(from_, [to], msg)
return s.quit()
- except Exception, e:
+ except Exception as e:
last_error, last_traceback = e, traceback.format_exc()
if last_error is not None:
print last_traceback
diff --git a/src/calibre/web/feeds/feedparser.py b/src/calibre/web/feeds/feedparser.py
index ead9207b70..99c3e09666 100755
--- a/src/calibre/web/feeds/feedparser.py
+++ b/src/calibre/web/feeds/feedparser.py
@@ -6,12 +6,11 @@ Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
Visit http://feedparser.org/ for the latest version
Visit http://feedparser.org/docs/ for the latest documentation
-Required: Python 2.1 or later
-Recommended: Python 2.3 or later
+Required: Python 2.4 or later
Recommended: CJKCodecs and iconv_codec
"""
-__version__ = "4.2-pre-" + "$Revision: 316 $"[11:14] + "-svn"
+__version__ = "5.0.1"
__license__ = """Copyright (c) 2002-2008, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@@ -42,14 +41,14 @@ __contributors__ = ["Jason Diamond ",
"Kevin Marks ",
"Sam Ruby ",
"Ade Oshineye ",
- "Martin Pool "]
+ "Martin Pool ",
+ "Kurt McKee "]
_debug = 0
# HTTP "User-Agent" header to send to servers when downloading feeds.
# If you are embedding feedparser in a larger application, you should
# change this to your application name and URL.
-USER_AGENT = 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4' # Changed by Kovid
-
+USER_AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11' # Changed by Kovid
# HTTP "Accept" header to send to servers when downloading feeds. If you don't
# want to send an Accept header, set this to None.
ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"
@@ -76,12 +75,73 @@ RESOLVE_RELATIVE_URIS = 1
# HTML content, set this to 1.
SANITIZE_HTML = 1
-# ---------- required modules (should come with any Python distribution) ----------
-import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2
+# ---------- Python 3 modules (make it work if possible) ----------
try:
- from cStringIO import StringIO as _StringIO
+ import rfc822
+except ImportError:
+ from email import _parseaddr as rfc822
+
+try:
+ # Python 3.1 introduces bytes.maketrans and simultaneously
+ # deprecates string.maketrans; use bytes.maketrans if possible
+ _maketrans = bytes.maketrans
+except (NameError, AttributeError):
+ import string
+ _maketrans = string.maketrans
+
+# base64 support for Atom feeds that contain embedded binary data
+try:
+ import base64, binascii
+ # Python 3.1 deprecates decodestring in favor of decodebytes
+ _base64decode = getattr(base64, 'decodebytes', base64.decodestring)
except:
- from StringIO import StringIO as _StringIO
+ base64 = binascii = None
+
+def _s2bytes(s):
+ # Convert a UTF-8 str to bytes if the interpreter is Python 3
+ try:
+ return bytes(s, 'utf8')
+ except (NameError, TypeError):
+ # In Python 2.5 and below, bytes doesn't exist (NameError)
+ # In Python 2.6 and above, bytes and str are the same (TypeError)
+ return s
+
+def _l2bytes(l):
+ # Convert a list of ints to bytes if the interpreter is Python 3
+ try:
+ if bytes is not str:
+ # In Python 2.6 and above, this call won't raise an exception
+ # but it will return bytes([65]) as '[65]' instead of 'A'
+ return bytes(l)
+ raise NameError
+ except NameError:
+ return ''.join(map(chr, l))
+
+# If you want feedparser to allow all URL schemes, set this to ()
+# List culled from Python's urlparse documentation at:
+# http://docs.python.org/library/urlparse.html
+# as well as from "URI scheme" at Wikipedia:
+# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
+# Many more will likely need to be added!
+ACCEPTABLE_URI_SCHEMES = (
+ 'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'mailto',
+ 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu', 'sftp',
+ 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet', 'wais',
+ # Additional common-but-unofficial schemes
+ 'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs',
+ 'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg',
+)
+#ACCEPTABLE_URI_SCHEMES = ()
+
+# ---------- required modules (should come with any Python distribution) ----------
+import sgmllib, re, sys, copy, urlparse, time, types, cgi, urllib, urllib2, datetime
+try:
+ from io import BytesIO as _StringIO
+except ImportError:
+ try:
+ from cStringIO import StringIO as _StringIO
+ except:
+ from StringIO import StringIO as _StringIO
# ---------- optional modules (feedparser will work without these, but with reduced functionality) ----------
@@ -114,12 +174,6 @@ except:
data = data.replace(char, entity)
return data
-# base64 support for Atom feeds that contain embedded binary data
-try:
- import base64, binascii
-except:
- base64 = binascii = None
-
# cjkcodecs and iconv_codec provide support for more character encodings.
# Both are available from http://cjkpython.i18n.org/
try:
@@ -172,17 +226,27 @@ class UndeclaredNamespace(Exception): pass
sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
sgmllib.special = re.compile(']|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''')
+ class EndBracketRegEx:
+ def __init__(self):
+ # Overriding the built-in sgmllib.endbracket regex allows the
+ # parser to find angle brackets embedded in element attributes.
+ self.endbracket = re.compile('''([^'"<>]|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''')
def search(self,string,index=0):
- self.match = self.endbracket.match(string,index)
- if self.match: return self
- def start(self,n):
+ match = self.endbracket.match(string,index)
+ if match is not None:
+ # Returning a new object in the calling thread's context
+ # resolves a thread-safety.
+ return EndBracketMatch(match)
+ return None
+ class EndBracketMatch:
+ def __init__(self, match):
+ self.match = match
+ def start(self, n):
return self.match.end(n)
- sgmllib.endbracket = EndBracketMatch()
+ sgmllib.endbracket = EndBracketRegEx()
SUPPORTED_VERSIONS = {'': 'unknown',
'rss090': 'RSS 0.90',
@@ -220,7 +284,7 @@ class FeedParserDict(UserDict):
'guid': 'id',
'date': 'updated',
'date_parsed': 'updated_parsed',
- 'description': ['subtitle', 'summary'],
+ 'description': ['summary', 'subtitle'],
'url': ['href'],
'modified': 'updated',
'modified_parsed': 'updated_parsed',
@@ -245,9 +309,9 @@ class FeedParserDict(UserDict):
realkey = self.keymap.get(key, key)
if type(realkey) == types.ListType:
for k in realkey:
- if UserDict.has_key(self, k):
+ if UserDict.__contains__(self, k):
return UserDict.__getitem__(self, k)
- if UserDict.has_key(self, key):
+ if UserDict.__contains__(self, key):
return UserDict.__getitem__(self, key)
return UserDict.__getitem__(self, realkey)
@@ -272,9 +336,12 @@ class FeedParserDict(UserDict):
def has_key(self, key):
try:
- return hasattr(self, key) or UserDict.has_key(self, key)
+ return hasattr(self, key) or UserDict.__contains__(self, key)
except AttributeError:
return False
+ # This alias prevents the 2to3 tool from changing the semantics of the
+ # __contains__ function below and exhausting the maximum recursion depth
+ __has_key = has_key
def __getattr__(self, key):
try:
@@ -294,7 +361,7 @@ class FeedParserDict(UserDict):
return self.__setitem__(key, value)
def __contains__(self, key):
- return self.has_key(key)
+ return self.__has_key(key)
def zopeCompatibilityHack():
global FeedParserDict
@@ -327,9 +394,8 @@ def _ebcdic_to_ascii(s):
92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249,
48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255
)
- import string
- _ebcdic_to_ascii_map = string.maketrans( \
- ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+ _ebcdic_to_ascii_map = _maketrans( \
+ _l2bytes(range(256)), _l2bytes(emap))
return s.translate(_ebcdic_to_ascii_map)
_cp1252 = {
@@ -483,6 +549,10 @@ class _FeedParserMixin:
# normalize attrs
attrs = [(k.lower(), v) for k, v in attrs]
attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
+ # the sgml parser doesn't handle entities in attributes, but
+ # strict xml parsers do -- account for this difference
+ if isinstance(self, _LooseFeedParser):
+ attrs = [(k, v.replace('&', '&')) for k, v in attrs]
# track xml:base and xml:lang
attrsD = dict(attrs)
@@ -492,7 +562,12 @@ class _FeedParserMixin:
baseuri = unicode(baseuri, self.encoding)
except:
baseuri = unicode(baseuri, 'iso-8859-1')
- self.baseuri = _urljoin(self.baseuri, baseuri)
+ # ensure that self.baseuri is always an absolute URI that
+ # uses a whitelisted URI scheme (e.g. not `javscript:`)
+ if self.baseuri:
+ self.baseuri = _makeSafeAbsoluteURI(self.baseuri, baseuri) or self.baseuri
+ else:
+ self.baseuri = _urljoin(self.baseuri, baseuri)
lang = attrsD.get('xml:lang', attrsD.get('lang'))
if lang == '':
# xml:lang could be explicitly set to '', we need to capture that
@@ -671,7 +746,7 @@ class _FeedParserMixin:
def mapContentType(self, contentType):
contentType = contentType.lower()
- if contentType == 'text':
+ if contentType == 'text' or contentType == 'plain':
contentType = 'text/plain'
elif contentType == 'html':
contentType = 'text/html'
@@ -735,6 +810,11 @@ class _FeedParserMixin:
else:
pieces = pieces[1:-1]
+ # Ensure each piece is a str for Python 3
+ for (i, v) in enumerate(pieces):
+ if not isinstance(v, basestring):
+ pieces[i] = v.decode('utf-8')
+
output = ''.join(pieces)
if stripWhitespace:
output = output.strip()
@@ -743,11 +823,15 @@ class _FeedParserMixin:
# decode base64 content
if base64 and self.contentparams.get('base64', 0):
try:
- output = base64.decodestring(output)
+ output = _base64decode(output)
except binascii.Error:
pass
except binascii.Incomplete:
pass
+ except TypeError:
+ # In Python 3, base64 takes and outputs bytes, not str
+ # This may not be the most correct way to accomplish this
+ output = _base64decode(output.encode('utf-8')).decode('utf-8')
# resolve relative URIs
if (element in self.can_be_relative_uri) and output:
@@ -805,7 +889,7 @@ class _FeedParserMixin:
# address common error where people take data that is already
# utf-8, presume that it is iso-8859-1, and re-encode it.
- if self.encoding=='utf-8' and type(output) == type(u''):
+ if self.encoding in ('utf-8', 'utf-8_INVALID_PYTHON_3') and type(output) == type(u''):
try:
output = unicode(output.encode('iso-8859-1'), 'utf-8')
except:
@@ -830,9 +914,14 @@ class _FeedParserMixin:
contentparams['value'] = output
self.entries[-1][element].append(contentparams)
elif element == 'link':
- self.entries[-1][element] = output
- if output:
- self.entries[-1]['links'][-1]['href'] = output
+ if not self.inimage:
+ # query variables in urls in link elements are improperly
+ # converted from `?a=1&b=2` to `?a=1&b;=2` as if they're
+ # unhandled character references. fix this special case.
+ output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output)
+ self.entries[-1][element] = output
+ if output:
+ self.entries[-1]['links'][-1]['href'] = output
else:
if element == 'description':
element = 'summary'
@@ -847,6 +936,9 @@ class _FeedParserMixin:
element = 'subtitle'
context[element] = output
if element == 'link':
+ # fix query variables; see above for the explanation
+ output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output)
+ context[element] = output
context['links'][-1]['href'] = output
elif self.incontent:
contentparams = copy.deepcopy(self.contentparams)
@@ -874,21 +966,21 @@ class _FeedParserMixin:
# text, but this is routinely ignored. This is an attempt to detect
# the most common cases. As false positives often result in silent
# data loss, this function errs on the conservative side.
- def lookslikehtml(self, str):
+ def lookslikehtml(self, s):
if self.version.startswith('atom'): return
if self.contentparams.get('type','text/html') != 'text/plain': return
# must have a close tag or a entity reference to qualify
- if not (re.search(r'(\w+)>',str) or re.search("?\w+;",str)): return
+ if not (re.search(r'(\w+)>',s) or re.search("?\w+;",s)): return
# all tags must be in a restricted subset of valid HTML tags
if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements,
- re.findall(r'?(\w+)',str)): return
+ re.findall(r'?(\w+)',s)): return
# all entities must have been defined as valid HTML entities
from htmlentitydefs import entitydefs
if filter(lambda e: e not in entitydefs.keys(),
- re.findall(r'&(\w+);',str)): return
+ re.findall(r'&(\w+);',s)): return
return 1
@@ -929,9 +1021,12 @@ class _FeedParserMixin:
attrsD['href'] = href
return attrsD
- def _save(self, key, value):
+ def _save(self, key, value, overwrite=False):
context = self._getContext()
- context.setdefault(key, value)
+ if overwrite:
+ context[key] = value
+ else:
+ context.setdefault(key, value)
def _start_rss(self, attrsD):
versionmap = {'0.91': 'rss091u',
@@ -988,7 +1083,8 @@ class _FeedParserMixin:
def _start_image(self, attrsD):
context = self._getContext()
- context.setdefault('image', FeedParserDict())
+ if not self.inentry:
+ context.setdefault('image', FeedParserDict())
self.inimage = 1
self.hasTitle = 0
self.push('image', 0)
@@ -1013,6 +1109,10 @@ class _FeedParserMixin:
def _start_author(self, attrsD):
self.inauthor = 1
self.push('author', 1)
+ # Append a new FeedParserDict when expecting an author
+ context = self._getContext()
+ context.setdefault('authors', [])
+ context['authors'].append(FeedParserDict())
_start_managingeditor = _start_author
_start_dc_author = _start_author
_start_dc_creator = _start_author
@@ -1147,6 +1247,8 @@ class _FeedParserMixin:
context.setdefault(prefix + '_detail', FeedParserDict())
context[prefix + '_detail'][key] = value
self._sync_author_detail()
+ context.setdefault('authors', [FeedParserDict()])
+ context['authors'][-1][key] = value
def _save_contributor(self, key, value):
context = self._getContext()
@@ -1252,7 +1354,7 @@ class _FeedParserMixin:
def _end_published(self):
value = self.pop('published')
- self._save('published_parsed', _parse_date(value))
+ self._save('published_parsed', _parse_date(value), overwrite=True)
_end_dcterms_issued = _end_published
_end_issued = _end_published
@@ -1262,15 +1364,17 @@ class _FeedParserMixin:
_start_dcterms_modified = _start_updated
_start_pubdate = _start_updated
_start_dc_date = _start_updated
+ _start_lastbuilddate = _start_updated
def _end_updated(self):
value = self.pop('updated')
parsed_value = _parse_date(value)
- self._save('updated_parsed', parsed_value)
+ self._save('updated_parsed', parsed_value, overwrite=True)
_end_modified = _end_updated
_end_dcterms_modified = _end_updated
_end_pubdate = _end_updated
_end_dc_date = _end_updated
+ _end_lastbuilddate = _end_updated
def _start_created(self, attrsD):
self.push('created', 1)
@@ -1278,14 +1382,14 @@ class _FeedParserMixin:
def _end_created(self):
value = self.pop('created')
- self._save('created_parsed', _parse_date(value))
+ self._save('created_parsed', _parse_date(value), overwrite=True)
_end_dcterms_created = _end_created
def _start_expirationdate(self, attrsD):
self.push('expired', 1)
def _end_expirationdate(self):
- self._save('expired_parsed', _parse_date(self.pop('expired')))
+ self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True)
def _start_cc_license(self, attrsD):
context = self._getContext()
@@ -1334,6 +1438,10 @@ class _FeedParserMixin:
_start_dc_subject = _start_category
_start_keywords = _start_category
+ def _start_media_category(self, attrsD):
+ attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema')
+ self._start_category(attrsD)
+
def _end_itunes_keywords(self):
for term in self.pop('itunes_keywords').split():
self._addTag(term, 'http://www.itunes.com/', None)
@@ -1354,6 +1462,7 @@ class _FeedParserMixin:
_end_dc_subject = _end_category
_end_keywords = _end_category
_end_itunes_category = _end_category
+ _end_media_category = _end_category
def _start_cloud(self, attrsD):
self._getContext()['cloud'] = FeedParserDict(attrsD)
@@ -1368,11 +1477,10 @@ class _FeedParserMixin:
attrsD = self._itsAnHrefDamnIt(attrsD)
if attrsD.has_key('href'):
attrsD['href'] = self.resolveURI(attrsD['href'])
- if attrsD.get('rel')=='enclosure' and not context.get('id'):
- context['id'] = attrsD.get('href')
expectingText = self.infeed or self.inentry or self.insource
context.setdefault('links', [])
- context['links'].append(FeedParserDict(attrsD))
+ if not (self.inentry and self.inimage):
+ context['links'].append(FeedParserDict(attrsD))
if attrsD.has_key('href'):
expectingText = 0
if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
@@ -1498,9 +1606,6 @@ class _FeedParserMixin:
context = self._getContext()
attrsD['rel']='enclosure'
context.setdefault('links', []).append(FeedParserDict(attrsD))
- href = attrsD.get('href')
- if href and not context.get('id'):
- context['id'] = href
def _start_source(self, attrsD):
if 'url' in attrsD:
@@ -1537,10 +1642,10 @@ class _FeedParserMixin:
_start_fullitem = _start_content_encoded
def _end_content(self):
- copyToDescription = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types)
+ copyToSummary = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types)
value = self.popContent('content')
- if copyToDescription:
- self._save('description', value)
+ if copyToSummary:
+ self._save('summary', value)
_end_body = _end_content
_end_xhtml_body = _end_content
@@ -1550,7 +1655,8 @@ class _FeedParserMixin:
def _start_itunes_image(self, attrsD):
self.push('itunes_image', 0)
- self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})
+ if attrsD.get('href'):
+ self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})
_start_itunes_link = _start_itunes_image
def _end_itunes_block(self):
@@ -1559,7 +1665,10 @@ class _FeedParserMixin:
def _end_itunes_explicit(self):
value = self.pop('itunes_explicit', 0)
- self._getContext()['itunes_explicit'] = (value == 'yes') and 1 or 0
+ # Convert 'yes' -> True, 'clean' to False, and any other value to None
+ # False and None both evaluate as False, so the difference can be ignored
+ # by applications that only need to know if the content is explicit.
+ self._getContext()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0]
def _start_media_content(self, attrsD):
context = self._getContext()
@@ -1588,6 +1697,17 @@ class _FeedParserMixin:
context = self._getContext()
context['media_player']['content'] = value
+ def _start_newlocation(self, attrsD):
+ self.push('newlocation', 1)
+
+ def _end_newlocation(self):
+ url = self.pop('newlocation')
+ context = self._getContext()
+ # don't set newlocation if the context isn't right
+ if context is not self.feeddata:
+ return
+ context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip())
+
if _XML_AVAILABLE:
class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler):
def __init__(self, baseuri, baselang, encoding):
@@ -1689,9 +1809,9 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
'source', 'track', 'wbr'
]
- def __init__(self, encoding, type):
+ def __init__(self, encoding, _type):
self.encoding = encoding
- self.type = type
+ self._type = _type
if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding)
sgmllib.SGMLParser.__init__(self)
@@ -1708,7 +1828,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
def parse_starttag(self,i):
j=sgmllib.SGMLParser.parse_starttag(self, i)
- if self.type == 'application/xhtml+xml':
+ if self._type == 'application/xhtml+xml':
if j>2 and self.rawdata[j-2:j]=='/>':
self.unknown_endtag(self.lasttag)
return j
@@ -1719,8 +1839,14 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
data = data.replace(''', "'")
data = data.replace('"', '"')
- if self.encoding and type(data) == type(u''):
- data = data.encode(self.encoding)
+ try:
+ bytes
+ if bytes is str:
+ raise NameError
+ self.encoding = self.encoding + '_INVALID_PYTHON_3'
+ except NameError:
+ if self.encoding and type(data) == type(u''):
+ data = data.encode(self.encoding)
sgmllib.SGMLParser.feed(self, data)
sgmllib.SGMLParser.close(self)
@@ -1749,7 +1875,11 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
value = unicode(value, self.encoding)
except:
value = unicode(value, 'iso-8859-1')
- uattrs.append((unicode(key, self.encoding), value))
+ try:
+ # Currently, in Python 3 the key is already a str, and cannot be decoded again
+ uattrs.append((unicode(key, self.encoding), value))
+ except TypeError:
+ uattrs.append((key, value))
strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs])
if self.encoding:
try:
@@ -1840,6 +1970,14 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
'''Return processed HTML as a single string'''
return ''.join([str(p) for p in self.pieces])
+ def parse_declaration(self, i):
+ try:
+ return sgmllib.SGMLParser.parse_declaration(self, i)
+ except sgmllib.SGMLParseError:
+ # escape the doctype declaration and continue parsing
+ self.handle_data('<')
+ return i+1
+
class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor):
def __init__(self, baseuri, baselang, encoding, entities):
sgmllib.SGMLParser.__init__(self)
@@ -2019,10 +2157,10 @@ class _MicroformatsParser:
arLines = []
def processSingleString(sProperty):
- sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1)
+ sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding)
if sValue:
arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue))
- return sValue or ''
+ return sValue or u''
def processSingleURI(sProperty):
sValue = self.getPropertyValue(elmCard, sProperty, self.URI)
@@ -2071,8 +2209,8 @@ class _MicroformatsParser:
sAgentValue = sAgentValue.replace(';', '\\;')
if sAgentValue:
arLines.append(self.vcardFold('AGENT:' + sAgentValue))
- elmAgent['class'] = ''
- elmAgent.contents = []
+ # Completely remove the agent element from the parse tree
+ elmAgent.extract()
else:
sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1);
if sAgentValue:
@@ -2219,8 +2357,8 @@ class _MicroformatsParser:
processSingleURI('key')
if arLines:
- arLines = ['BEGIN:vCard','VERSION:3.0'] + arLines + ['END:vCard']
- sVCards += '\n'.join(arLines) + '\n'
+ arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard']
+ sVCards += u'\n'.join(arLines) + u'\n'
return sVCards.strip()
@@ -2277,7 +2415,12 @@ class _MicroformatsParser:
def _parseMicroformats(htmlSource, baseURI, encoding):
if not BeautifulSoup: return
if _debug: sys.stderr.write('entering _parseMicroformats\n')
- p = _MicroformatsParser(htmlSource, baseURI, encoding)
+ try:
+ p = _MicroformatsParser(htmlSource, baseURI, encoding)
+ except UnicodeEncodeError:
+ # sgmllib throws this exception when performing lookups of tags
+ # with non-ASCII characters in them.
+ return
p.vcard = p.findVCards(p.document)
p.findTags()
p.findEnclosures()
@@ -2311,12 +2454,12 @@ class _RelativeURIResolver(_BaseHTMLProcessor):
('q', 'cite'),
('script', 'src')]
- def __init__(self, baseuri, encoding, type):
- _BaseHTMLProcessor.__init__(self, encoding, type)
+ def __init__(self, baseuri, encoding, _type):
+ _BaseHTMLProcessor.__init__(self, encoding, _type)
self.baseuri = baseuri
def resolveURI(self, uri):
- return _urljoin(self.baseuri, uri.strip())
+ return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip()))
def unknown_starttag(self, tag, attrs):
if _debug:
@@ -2325,27 +2468,44 @@ class _RelativeURIResolver(_BaseHTMLProcessor):
attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs]
_BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
-def _resolveRelativeURIs(htmlSource, baseURI, encoding, type):
+def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type):
if _debug:
sys.stderr.write('entering _resolveRelativeURIs\n')
- p = _RelativeURIResolver(baseURI, encoding, type)
+ p = _RelativeURIResolver(baseURI, encoding, _type)
p.feed(htmlSource)
return p.output()
+def _makeSafeAbsoluteURI(base, rel=None):
+ # bail if ACCEPTABLE_URI_SCHEMES is empty
+ if not ACCEPTABLE_URI_SCHEMES:
+ return _urljoin(base, rel or u'')
+ if not base:
+ return rel or u''
+ if not rel:
+ scheme = urlparse.urlparse(base)[0]
+ if not scheme or scheme in ACCEPTABLE_URI_SCHEMES:
+ return base
+ return u''
+ uri = _urljoin(base, rel)
+ if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES:
+ return u''
+ return uri
+
class _HTMLSanitizer(_BaseHTMLProcessor):
- acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'article',
- 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', 'canvas',
- 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'command',
- 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir',
- 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', 'figure', 'footer',
- 'font', 'form', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i',
- 'img', 'input', 'ins', 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map',
- 'menu', 'meter', 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup',
- 'option', 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
- 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', 'sub',
- 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', 'th', 'thead',
- 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript']
+ acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
+ 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
+ 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
+ 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
+ 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
+ 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
+ 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
+ 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
+ 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
+ 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
+ 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
+ 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
+ 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript']
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
@@ -2469,7 +2629,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
self.unacceptablestack += 1
# add implicit namespaces to html5 inline svg/mathml
- if self.type.endswith('html'):
+ if self._type.endswith('html'):
if not dict(attrs).get('xmlns'):
if tag=='svg':
attrs.append( ('xmlns','http://www.w3.org/2000/svg') )
@@ -2514,6 +2674,9 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
for key, value in self.normalize_attrs(attrs):
if key in acceptable_attributes:
key=keymap.get(key,key)
+ # make sure the uri uses an acceptable uri scheme
+ if key == u'href':
+ value = _makeSafeAbsoluteURI(value)
clean_attrs.append((key,value))
elif key=='style':
clean_value = self.sanitize_style(value)
@@ -2569,9 +2732,22 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
return ' '.join(clean)
+ def parse_comment(self, i, report=1):
+ ret = _BaseHTMLProcessor.parse_comment(self, i, report)
+ if ret >= 0:
+ return ret
+ # if ret == -1, this may be a malicious attempt to circumvent
+ # sanitization, or a page-destroying unclosed comment
+ match = re.compile(r'--[^>]*>').search(self.rawdata, i+4)
+ if match:
+ return match.end()
+ # unclosed comment; deliberately fail to handle_data()
+ return len(self.rawdata)
-def _sanitizeHTML(htmlSource, encoding, type):
- p = _HTMLSanitizer(encoding, type)
+
+def _sanitizeHTML(htmlSource, encoding, _type):
+ p = _HTMLSanitizer(encoding, _type)
+ htmlSource = htmlSource.replace('= '2.3.3'
assert base64 != None
- user, passw = base64.decodestring(req.headers['Authorization'].split(' ')[1]).split(':')
+ user, passw = _base64decode(req.headers['Authorization'].split(' ')[1]).split(':')
realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0]
self.add_password(realm, host, user, passw)
retry = self.http_error_auth_reqed('www-authenticate', host, req, headers)
@@ -2663,7 +2839,7 @@ class _FeedURLHandler(urllib2.HTTPDigestAuthHandler, urllib2.HTTPRedirectHandler
except:
return self.http_error_default(req, fp, code, msg, headers)
-def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, extra_headers):
+def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers):
"""URL, filename, or string --> stream
This function lets you define parsers that take any input source
@@ -2691,7 +2867,7 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
If handlers is supplied, it is a list of handlers used to build a
urllib2 opener.
- if extra_headers is supplied it is a dictionary of HTTP request headers
+ if request_headers is supplied it is a dictionary of HTTP request headers
that will override the values generated by FeedParser.
"""
@@ -2701,7 +2877,12 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
if url_file_stream_or_string == '-':
return sys.stdin
- if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp'):
+ if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'):
+ # Deal with the feed URI scheme
+ if url_file_stream_or_string.startswith('feed:http'):
+ url_file_stream_or_string = url_file_stream_or_string[5:]
+ elif url_file_stream_or_string.startswith('feed:'):
+ url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:]
if not agent:
agent = USER_AGENT
# test for inline user:password for basic auth
@@ -2713,20 +2894,20 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
user_passwd, realhost = urllib.splituser(realhost)
if user_passwd:
url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest)
- auth = base64.encodestring(user_passwd).strip()
+ auth = base64.standard_b64encode(user_passwd).strip()
# iri support
try:
if isinstance(url_file_stream_or_string,unicode):
- url_file_stream_or_string = url_file_stream_or_string.encode('idna')
+ url_file_stream_or_string = url_file_stream_or_string.encode('idna').decode('utf-8')
else:
- url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna')
+ url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna').decode('utf-8')
except:
pass
# try to open with urllib2 (to use optional headers)
- request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, extra_headers)
- opener = apply(urllib2.build_opener, tuple([_FeedURLHandler()] + handlers))
+ request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, request_headers)
+ opener = apply(urllib2.build_opener, tuple(handlers + [_FeedURLHandler()]))
opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent
try:
return opener.open(request)
@@ -2735,20 +2916,22 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
# try to open with native open function (if url_file_stream_or_string is a filename)
try:
- return open(url_file_stream_or_string)
+ return open(url_file_stream_or_string, 'rb')
except:
pass
# treat url_file_stream_or_string as string
return _StringIO(str(url_file_stream_or_string))
-def _build_urllib2_request(url, agent, etag, modified, referrer, auth, extra_headers):
+def _build_urllib2_request(url, agent, etag, modified, referrer, auth, request_headers):
request = urllib2.Request(url)
request.add_header('User-Agent', agent)
if etag:
request.add_header('If-None-Match', etag)
if type(modified) == type(''):
modified = _parse_date(modified)
+ elif isinstance(modified, datetime.datetime):
+ modified = modified.utctimetuple()
if modified:
# format into an RFC 1123-compliant timestamp. We can't use
# time.strftime() since the %a and %b directives can be affected
@@ -2773,7 +2956,7 @@ def _build_urllib2_request(url, agent, etag, modified, referrer, auth, extra_hea
request.add_header('Accept', ACCEPT_HEADER)
# use this for whatever -- cookies, special headers, etc
# [('Cookie','Something'),('x-special-header','Another Value')]
- for header_name, header_value in extra_headers.items():
+ for header_name, header_value in request_headers.items():
request.add_header(header_name, header_value)
request.add_header('A-IM', 'feed') # RFC 3229 support
return request
@@ -2811,9 +2994,15 @@ _iso8601_re = [
+ r'(\.(?P\d+))?'
+ r'(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?'
for tmpl in _iso8601_tmpl]
-del tmpl
+try:
+ del tmpl
+except NameError:
+ pass
_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]
-del regex
+try:
+ del regex
+except NameError:
+ pass
def _parse_date_iso8601(dateString):
'''Parse a variety of ISO-8601-compatible formats like 20040105'''
m = None
@@ -2887,7 +3076,7 @@ def _parse_date_iso8601(dateString):
# Python's time.mktime() is a wrapper around the ANSI C mktime(3c)
# which is guaranteed to normalize d/m/y/h/m/s.
# Many implementations have bugs, but we'll pretend they don't.
- return time.localtime(time.mktime(tm))
+ return time.localtime(time.mktime(tuple(tm)))
registerDateHandler(_parse_date_iso8601)
# 8-bit date handling routines written by ytrewq1.
@@ -3128,12 +3317,12 @@ def _parse_date_w3dtf(dateString):
__date_re = ('(?P\d\d\d\d)'
'(?:(?P-|)'
- '(?:(?P\d\d\d)'
- '|(?P\d\d)(?:(?P=dsep)(?P\d\d))?))?')
+ '(?:(?P\d\d)(?:(?P=dsep)(?P\d\d))?'
+ '|(?P\d\d\d)))?')
__tzd_re = '(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)'
__tzd_rx = re.compile(__tzd_re)
__time_re = ('(?P\d\d)(?P:|)(?P\d\d)'
- '(?:(?P=tsep)(?P\d\d(?:[.,]\d+)?))?'
+ '(?:(?P=tsep)(?P\d\d)(?:[.,]\d+)?)?'
+ __tzd_re)
__datetime_re = '%s(?:T%s)?' % (__date_re, __time_re)
__datetime_rx = re.compile(__datetime_re)
@@ -3157,6 +3346,10 @@ def _parse_date_rfc822(dateString):
else:
data.append('')
dateString = " ".join(data)
+ # Account for the Etc/GMT timezone by stripping 'Etc/'
+ elif len(data) == 5 and data[4].lower().startswith('etc/'):
+ data[4] = data[4][4:]
+ dateString = " ".join(data)
if len(data) < 5:
dateString += ' 00:00:00 GMT'
tm = rfc822.parsedate_tz(dateString)
@@ -3194,7 +3387,7 @@ def _parse_date(dateString):
raise ValueError
map(int, date9tuple)
return date9tuple
- except Exception, e:
+ except Exception as e:
if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e)))
pass
return None
@@ -3261,59 +3454,59 @@ def _getCharacterEncoding(http_headers, xml_data):
sniffed_xml_encoding = ''
xml_encoding = ''
true_encoding = ''
- http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type'))
+ http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type', http_headers.get('Content-type')))
# Must sniff for non-ASCII-compatible character encodings before
# searching for XML declaration. This heuristic is defined in
# section F of the XML specification:
# http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
try:
- if xml_data[:4] == '\x4c\x6f\xa7\x94':
+ if xml_data[:4] == _l2bytes([0x4c, 0x6f, 0xa7, 0x94]):
# EBCDIC
xml_data = _ebcdic_to_ascii(xml_data)
- elif xml_data[:4] == '\x00\x3c\x00\x3f':
+ elif xml_data[:4] == _l2bytes([0x00, 0x3c, 0x00, 0x3f]):
# UTF-16BE
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != '\x00\x00'):
+ elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xfe, 0xff])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])):
# UTF-16BE with BOM
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
- elif xml_data[:4] == '\x3c\x00\x3f\x00':
+ elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x3f, 0x00]):
# UTF-16LE
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and (xml_data[2:4] != '\x00\x00'):
+ elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xff, 0xfe])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])):
# UTF-16LE with BOM
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
- elif xml_data[:4] == '\x00\x00\x00\x3c':
+ elif xml_data[:4] == _l2bytes([0x00, 0x00, 0x00, 0x3c]):
# UTF-32BE
sniffed_xml_encoding = 'utf-32be'
xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
- elif xml_data[:4] == '\x3c\x00\x00\x00':
+ elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x00, 0x00]):
# UTF-32LE
sniffed_xml_encoding = 'utf-32le'
xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
- elif xml_data[:4] == '\x00\x00\xfe\xff':
+ elif xml_data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]):
# UTF-32BE with BOM
sniffed_xml_encoding = 'utf-32be'
xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
- elif xml_data[:4] == '\xff\xfe\x00\x00':
+ elif xml_data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]):
# UTF-32LE with BOM
sniffed_xml_encoding = 'utf-32le'
xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
- elif xml_data[:3] == '\xef\xbb\xbf':
+ elif xml_data[:3] == _l2bytes([0xef, 0xbb, 0xbf]):
# UTF-8 with BOM
sniffed_xml_encoding = 'utf-8'
xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
else:
# ASCII-compatible
pass
- xml_encoding_match = re.compile('^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data)
+ xml_encoding_match = re.compile(_s2bytes('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')).match(xml_data)
except:
xml_encoding_match = None
if xml_encoding_match:
- xml_encoding = xml_encoding_match.groups()[0].lower()
+ xml_encoding = xml_encoding_match.groups()[0].decode('utf-8').lower()
if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')):
xml_encoding = sniffed_xml_encoding
acceptable_content_type = 0
@@ -3329,7 +3522,7 @@ def _getCharacterEncoding(http_headers, xml_data):
true_encoding = http_encoding or 'us-ascii'
elif http_content_type.startswith('text/'):
true_encoding = http_encoding or 'us-ascii'
- elif http_headers and (not http_headers.has_key('content-type')):
+ elif http_headers and (not (http_headers.has_key('content-type') or http_headers.has_key('Content-type'))):
true_encoding = xml_encoding or 'iso-8859-1'
else:
true_encoding = xml_encoding or 'utf-8'
@@ -3347,35 +3540,35 @@ def _toUTF8(data, encoding):
'''
if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding)
# strip Byte Order Mark (if present)
- if (len(data) >= 4) and (data[:2] == '\xfe\xff') and (data[2:4] != '\x00\x00'):
+ if (len(data) >= 4) and (data[:2] == _l2bytes([0xfe, 0xff])) and (data[2:4] != _l2bytes([0x00, 0x00])):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-16be':
sys.stderr.write('trying utf-16be instead\n')
encoding = 'utf-16be'
data = data[2:]
- elif (len(data) >= 4) and (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'):
+ elif (len(data) >= 4) and (data[:2] == _l2bytes([0xff, 0xfe])) and (data[2:4] != _l2bytes([0x00, 0x00])):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-16le':
sys.stderr.write('trying utf-16le instead\n')
encoding = 'utf-16le'
data = data[2:]
- elif data[:3] == '\xef\xbb\xbf':
+ elif data[:3] == _l2bytes([0xef, 0xbb, 0xbf]):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-8':
sys.stderr.write('trying utf-8 instead\n')
encoding = 'utf-8'
data = data[3:]
- elif data[:4] == '\x00\x00\xfe\xff':
+ elif data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-32be':
sys.stderr.write('trying utf-32be instead\n')
encoding = 'utf-32be'
data = data[4:]
- elif data[:4] == '\xff\xfe\x00\x00':
+ elif data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-32le':
@@ -3398,36 +3591,36 @@ def _stripDoctype(data):
rss_version may be 'rss091n' or None
stripped_data is the same XML document, minus the DOCTYPE
'''
- start = re.search('<\w',data)
+ start = re.search(_s2bytes('<\w'), data)
start = start and start.start() or -1
head,data = data[:start+1], data[start+1:]
- entity_pattern = re.compile(r'^\s*]*?)>', re.MULTILINE)
+ entity_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE)
entity_results=entity_pattern.findall(head)
- head = entity_pattern.sub('', head)
- doctype_pattern = re.compile(r'^\s*]*?)>', re.MULTILINE)
+ head = entity_pattern.sub(_s2bytes(''), head)
+ doctype_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE)
doctype_results = doctype_pattern.findall(head)
- doctype = doctype_results and doctype_results[0] or ''
- if doctype.lower().count('netscape'):
+ doctype = doctype_results and doctype_results[0] or _s2bytes('')
+ if doctype.lower().count(_s2bytes('netscape')):
version = 'rss091n'
else:
version = None
# only allow in 'safe' inline entity definitions
- replacement=''
+ replacement=_s2bytes('')
if len(doctype_results)==1 and entity_results:
- safe_pattern=re.compile('\s+(\w+)\s+"(\w+;|[^&"]*)"')
+ safe_pattern=re.compile(_s2bytes('\s+(\w+)\s+"(\w+;|[^&"]*)"'))
safe_entities=filter(lambda e: safe_pattern.match(e),entity_results)
if safe_entities:
- replacement='\n]>' % '>\n \n \n]>')
data = doctype_pattern.sub(replacement, head) + data
- return version, data, dict(replacement and safe_pattern.findall(replacement))
+ return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)])
-def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], extra_headers={}):
+def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}):
'''Parse a feed from a URL, file, stream, or string.
- extra_headers, if given, is a dict from http header name to value to add
+ request_headers, if given, is a dict from http header name to value to add
to the request; this overrides internally generated values.
'''
result = FeedParserDict()
@@ -3435,23 +3628,31 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['entries'] = []
if _XML_AVAILABLE:
result['bozo'] = 0
- if type(handlers) == types.InstanceType:
+ if not isinstance(handlers, list):
handlers = [handlers]
try:
- f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, extra_headers)
+ f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers)
data = f.read()
- except Exception, e:
+ except Exception as e:
result['bozo'] = 1
result['bozo_exception'] = e
data = None
f = None
+ if hasattr(f, 'headers'):
+ result['headers'] = dict(f.headers)
+ # overwrite existing headers using response_headers
+ if 'headers' in result:
+ result['headers'].update(response_headers)
+ elif response_headers:
+ result['headers'] = copy.deepcopy(response_headers)
+
# if feed is gzip-compressed, decompress it
- if f and data and hasattr(f, 'headers'):
- if gzip and f.headers.get('content-encoding', '') == 'gzip':
+ if f and data and 'headers' in result:
+ if gzip and result['headers'].get('content-encoding') == 'gzip':
try:
data = gzip.GzipFile(fileobj=_StringIO(data)).read()
- except Exception, e:
+ except Exception as e:
# Some feeds claim to be gzipped but they're not, so
# we get garbage. Ideally, we should re-request the
# feed without the 'Accept-encoding: gzip' header,
@@ -3459,30 +3660,29 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['bozo'] = 1
result['bozo_exception'] = e
data = ''
- elif zlib and f.headers.get('content-encoding', '') == 'deflate':
+ elif zlib and result['headers'].get('content-encoding') == 'deflate':
try:
data = zlib.decompress(data, -zlib.MAX_WBITS)
- except Exception, e:
+ except Exception as e:
result['bozo'] = 1
result['bozo_exception'] = e
data = ''
# save HTTP headers
- if hasattr(f, 'info'):
- info = f.info()
- etag = info.getheader('ETag')
- if etag:
- result['etag'] = etag
- last_modified = info.getheader('Last-Modified')
- if last_modified:
- result['modified'] = _parse_date(last_modified)
+ if 'headers' in result:
+ if 'etag' in result['headers'] or 'ETag' in result['headers']:
+ etag = result['headers'].get('etag', result['headers'].get('ETag'))
+ if etag:
+ result['etag'] = etag
+ if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']:
+ modified = result['headers'].get('last-modified', result['headers'].get('Last-Modified'))
+ if modified:
+ result['modified'] = _parse_date(modified)
if hasattr(f, 'url'):
result['href'] = f.url
result['status'] = 200
if hasattr(f, 'status'):
result['status'] = f.status
- if hasattr(f, 'headers'):
- result['headers'] = f.headers.dict
if hasattr(f, 'close'):
f.close()
@@ -3495,8 +3695,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['encoding'], http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type = \
_getCharacterEncoding(http_headers, data)
if http_headers and (not acceptable_content_type):
- if http_headers.has_key('content-type'):
- bozo_message = '%s is not an XML media type' % http_headers['content-type']
+ if http_headers.has_key('content-type') or http_headers.has_key('Content-type'):
+ bozo_message = '%s is not an XML media type' % http_headers.get('content-type', http_headers.get('Content-type'))
else:
bozo_message = 'no Content-type specified'
result['bozo'] = 1
@@ -3505,8 +3705,12 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
if data is not None:
result['version'], data, entities = _stripDoctype(data)
- baseuri = http_headers.get('content-location', result.get('href'))
- baselang = http_headers.get('content-language', None)
+ # ensure that baseuri is an absolute uri using an acceptable URI scheme
+ contentloc = http_headers.get('content-location', http_headers.get('Content-Location', ''))
+ href = result.get('href', '')
+ baseuri = _makeSafeAbsoluteURI(href, contentloc) or _makeSafeAbsoluteURI(contentloc) or href
+
+ baselang = http_headers.get('content-language', http_headers.get('Content-Language', None))
# if server sent 304, we're done
if result.get('status', 0) == 304:
@@ -3582,7 +3786,7 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
elif proposed_encoding != result['encoding']:
result['bozo'] = 1
result['bozo_exception'] = CharacterEncodingOverride( \
- 'documented declared as %s, but parsed as %s' % \
+ 'document declared as %s, but parsed as %s' % \
(result['encoding'], proposed_encoding))
result['encoding'] = proposed_encoding
@@ -3603,7 +3807,7 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
saxparser._ns_stack.append({'http://www.w3.org/XML/1998/namespace':'xml'})
try:
saxparser.parse(source)
- except Exception, e:
+ except Exception as e:
if _debug:
import traceback
traceback.print_stack()
@@ -3613,8 +3817,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['bozo_exception'] = feedparser.exc or e
use_strict_parser = 0
if not use_strict_parser:
- feedparser = _LooseFeedParser(baseuri, baselang, known_encoding and 'utf-8' or '', entities)
- feedparser.feed(data)
+ feedparser = _LooseFeedParser(baseuri, baselang, 'utf-8', entities)
+ feedparser.feed(data.decode('utf-8', 'replace'))
result['feed'] = feedparser.feeddata
result['entries'] = feedparser.entries
result['version'] = result['version'] or feedparser.version
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 6215132e4b..f2aeb4e4bd 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -14,7 +14,7 @@ from contextlib import nested, closing
from calibre import browser, __appname__, iswindows, \
- strftime, preferred_encoding
+ strftime, preferred_encoding, as_unicode
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre import entity_to_unicode
@@ -986,8 +986,8 @@ class BasicNewsRecipe(Recipe):
self.cover_path = None
try:
cu = self.get_cover_url()
- except Exception, err:
- self.log.error(_('Could not download cover: %s')%str(err))
+ except Exception as err:
+ self.log.error(_('Could not download cover: %s')%as_unicode(err))
self.log.debug(traceback.format_exc())
else:
if not cu:
@@ -1318,11 +1318,11 @@ class BasicNewsRecipe(Recipe):
oldest_article=self.oldest_article,
max_articles_per_feed=self.max_articles_per_feed,
get_article_url=self.get_article_url))
- except Exception, err:
+ except Exception as err:
feed = Feed()
msg = 'Failed feed: %s'%(title if title else url)
feed.populate_from_preparsed_feed(msg, [])
- feed.description = repr(err)
+ feed.description = as_unicode(err)
parsed_feeds.append(feed)
self.log.exception(msg)
@@ -1468,7 +1468,7 @@ class CalibrePeriodical(BasicNewsRecipe):
'http://news.calibre-ebook.com/subscribed_files/%s/0/temp.downloaded_recipe'
% self.calibre_periodicals_slug
).read()
- except Exception, e:
+ except Exception as e:
if hasattr(e, 'getcode') and e.getcode() == 403:
raise DownloadDenied(
_('You do not have permission to download this issue.'
diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py
index f2e22c8f5e..64a2c32fb3 100644
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@@ -210,7 +210,7 @@ class RecursiveFetcher(object):
with closing(open_func(url, timeout=self.timeout)) as f:
data = response(f.read()+f.read())
data.newurl = f.geturl()
- except urllib2.URLError, err:
+ except urllib2.URLError as err:
if hasattr(err, 'code') and responses.has_key(err.code):
raise FetchError, responses[err.code]
if getattr(err, 'reason', [0])[0] == 104 or \