diff --git a/src/calibre/ebooks/oeb/polish/check/opf.py b/src/calibre/ebooks/oeb/polish/check/opf.py
index a4dcf77b72..0df56fbb00 100644
--- a/src/calibre/ebooks/oeb/polish/check/opf.py
+++ b/src/calibre/ebooks/oeb/polish/check/opf.py
@@ -47,6 +47,27 @@ class NonLinearItems(BaseError):
container.dirty(container.opf_name)
return True
+class DuplicateHref(BaseError):
+
+ has_multiple_locations = True
+
+ INDIVIDUAL_FIX = _(
+ 'Remove all but the first duplicate item')
+
+ def __init__(self, name, eid, locs):
+ BaseError.__init__(self, _('Duplicate item in manifest: %s') % eid, name)
+ self.HELP = _(
+ 'The item {0} is present more than once in the manifest in {1}. This is'
+ ' not allowed.').format(eid, name)
+ self.all_locations = [(name, lnum, None) for lnum in sorted(locs)]
+ self.duplicate_href = eid
+
+ def __call__(self, container):
+ items = [e for e in container.opf_xpath('/opf:package/opf:manifest/opf:item[@href]') if e.get('href') == self.duplicate_href]
+ [container.remove_from_xml(e) for e in items[1:]]
+ container.dirty(self.name)
+ return True
+
def check_opf(container):
errors = []
@@ -69,9 +90,19 @@ def check_opf(container):
if nl_items:
errors.append(NonLinearItems(container.opf_name, nl_items))
- # Check unique identifier, version, tag with name before content for
- # cover and content pointing to proper manifest item. Duplicate items in
- # spine. Duplicate hrefs in manifest. hrefs in manifest that point to
- # missing resources.
+ seen, dups = {}, {}
+ for item in container.opf_xpath('/opf:package/opf:manifest/opf:item[@href]'):
+ href = item.get('href')
+ if href in seen:
+ if href not in dups:
+ dups[href] = [seen[href]]
+ dups[href].append(item.sourceline)
+ else:
+ seen[href] = item.sourceline
+ errors.extend(DuplicateHref(container.opf_name, eid, locs) for eid, locs in dups.iteritems())
+ # Check unique identifier, tag with name before content for
+ # cover and content pointing to proper manifest item. Duplicate items in
+ # spine. hrefs in manifest that point to
+ # missing resources.
return errors