diff --git a/src/calibre/ebooks/metadata/opf3.py b/src/calibre/ebooks/metadata/opf3.py
index 5714fa5f08..2e67f89f92 100644
--- a/src/calibre/ebooks/metadata/opf3.py
+++ b/src/calibre/ebooks/metadata/opf3.py
@@ -14,21 +14,11 @@ from calibre.ebooks.metadata import check_isbn, authors_to_string, string_to_aut
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf, ensure_unique, normalize_languages
from calibre.ebooks.oeb.base import OPF2_NSMAP, OPF, DC
+from calibre.utils.date import parse_date as parse_date_, fix_only_date, is_date_undefined, isoformat
+from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.localization import canonicalize_lang
# Utils {{{
-# http://www.idpf.org/epub/vocab/package/pfx/
-reserved_prefixes = {
- 'dcterms': 'http://purl.org/dc/terms/',
- 'epubsc': 'http://idpf.org/epub/vocab/sc/#',
- 'marc': 'http://id.loc.gov/vocabulary/',
- 'media': 'http://www.idpf.org/epub/vocab/overlays/#',
- 'onix': 'http://www.editeur.org/ONIX/book/codelists/current.html#',
- 'rendition':'http://www.idpf.org/vocab/rendition/#',
- 'schema': 'http://schema.org/',
- 'xsd': 'http://www.w3.org/2001/XMLSchema#',
-}
-
_xpath_cache = {}
_re_cache = {}
@@ -122,6 +112,20 @@ def simple_text(f):
# Prefixes {{{
+# http://www.idpf.org/epub/vocab/package/pfx/
+reserved_prefixes = {
+ 'dcterms': 'http://purl.org/dc/terms/',
+ 'epubsc': 'http://idpf.org/epub/vocab/sc/#',
+ 'marc': 'http://id.loc.gov/vocabulary/',
+ 'media': 'http://www.idpf.org/epub/vocab/overlays/#',
+ 'onix': 'http://www.editeur.org/ONIX/book/codelists/current.html#',
+ 'rendition':'http://www.idpf.org/vocab/rendition/#',
+ 'schema': 'http://schema.org/',
+ 'xsd': 'http://www.w3.org/2001/XMLSchema#',
+}
+
+CALIBRE_PREFIX = 'https://calibre-ebook.com'
+
def parse_prefixes(x):
return {m.group(1):m.group(2) for m in re.finditer(r'(\S+): \s*(\S+)', x)}
@@ -131,7 +135,7 @@ def read_prefixes(root):
return ans
def expand_prefix(raw, prefixes):
- return regex(r'(\S+)\s*:\s*(\S+)').sub(lambda m:(prefixes.get(m.group(1), m.group(1)) + ':' + m.group(2)), raw)
+ return regex(r'(\S+)\s*:\s*(\S+)').sub(lambda m:(prefixes.get(m.group(1), m.group(1)) + ':' + m.group(2)), raw or '')
def ensure_prefix(root, prefixes, prefix, value=None):
prefixes[prefix] = value or reserved_prefixes[prefix]
@@ -437,6 +441,91 @@ def set_book_producers(root, prefixes, refines, producers):
metadata.append(m)
# }}}
+# Dates {{{
+
+def parse_date(raw, is_w3cdtf=False):
+ raw = raw.strip()
+ if is_w3cdtf:
+ ans = parse_iso8601(raw, assume_utc=True)
+ if 'T' not in raw and ' ' not in raw:
+ ans = fix_only_date(ans)
+ else:
+ ans = parse_date_(raw, assume_utc=True)
+ if ' ' not in raw and 'T' not in raw and (ans.hour, ans.minute, ans.second) == (0, 0, 0):
+ ans = fix_only_date(ans)
+ return ans
+
+def read_pubdate(root, prefixes, refines):
+ for date in XPath('./opf:metadata/dc:date')(root):
+ val = (date.text or '').strip()
+ if val:
+ try:
+ return parse_date(val)
+ except Exception:
+ continue
+
+def set_pubdate(root, prefixes, refines, val):
+ for date in XPath('./opf:metadata/dc:date')(root):
+ remove_element(date, refines)
+ if not is_date_undefined(val):
+ val = isoformat(val)
+ m = XPath('./opf:metadata')(root)[0]
+ d = m.makeelement(DC('date'))
+ d.text = val
+ m.append(d)
+
+def read_timestamp(root, prefixes, refines):
+ pq = '%s:timestamp' % CALIBRE_PREFIX
+ sq = '%s:w3cdtf' % reserved_prefixes['dcterms']
+ for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
+ val = (meta.text or '').strip()
+ if val:
+ prop = expand_prefix(meta.get('property'), prefixes)
+ if prop.lower() == pq:
+ scheme = expand_prefix(meta.get('scheme'), prefixes).lower()
+ try:
+ return parse_date(val, is_w3cdtf=scheme == sq)
+ except Exception:
+ continue
+ for meta in XPath('./opf:metadata/opf:meta[@name="calibre:timestamp"]')(root):
+ val = meta.get('content')
+ if val:
+ try:
+ return parse_date(val, is_w3cdtf=True)
+ except Exception:
+ continue
+
+def set_timestamp(root, prefixes, refines, val):
+ ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
+ ensure_prefix(root, prefixes, 'dcterms')
+ pq = '%s:timestamp' % CALIBRE_PREFIX
+ for meta in XPath('./opf:metadata/opf:meta')(root):
+ prop = expand_prefix(meta.get('property'), prefixes)
+ if prop.lower() == pq or meta.get('name') == 'calibre:timestamp':
+ remove_element(meta, refines)
+ if not is_date_undefined(val):
+ val = isoformat(val)
+ m = XPath('./opf:metadata')(root)[0]
+ d = m.makeelement(OPF('meta'), attrib={'property':'calibre:timestamp', 'scheme':'dcterms:W3CDTF'})
+ d.text = val
+ m.append(d)
+
+
+def read_last_modified(root, prefixes, refines):
+ pq = '%s:modified' % reserved_prefixes['dcterms']
+ sq = '%s:w3cdtf' % reserved_prefixes['dcterms']
+ for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
+ val = (meta.text or '').strip()
+ if val:
+ prop = expand_prefix(meta.get('property'), prefixes)
+ if prop.lower() == pq:
+ scheme = expand_prefix(meta.get('scheme'), prefixes).lower()
+ try:
+ return parse_date(val, is_w3cdtf=scheme == sq)
+ except Exception:
+ continue
+# }}}
+
def read_metadata(root):
ans = Metadata(_('Unknown'), [_('Unknown')])
prefixes, refines = read_prefixes(root), read_refines(root)
@@ -459,6 +548,15 @@ def read_metadata(root):
bkp = read_book_producers(root, prefixes, refines)
if bkp:
ans.book_producer = bkp[0]
+ pd = read_pubdate(root, prefixes, refines)
+ if not is_date_undefined(pd):
+ ans.pubdate = pd
+ ts = read_timestamp(root, prefixes, refines)
+ if not is_date_undefined(ts):
+ ans.timestamp = ts
+ lm = read_last_modified(root, prefixes, refines)
+ if not is_date_undefined(lm):
+ ans.last_modified = lm
return ans
def get_metadata(stream):
@@ -475,6 +573,8 @@ def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False,
for i, aut in enumerate(mi.authors):
authors.append(Author(aut, aus[i] if i < len(aus) else None))
set_authors(root, prefixes, refines, authors)
+ set_pubdate(root, prefixes, refines, mi.pubdate)
+ set_timestamp(root, prefixes, refines, mi.timestamp)
pretty_print_opf(root)
diff --git a/src/calibre/ebooks/metadata/opf3_test.py b/src/calibre/ebooks/metadata/opf3_test.py
index d0cadc74c7..46426ae848 100644
--- a/src/calibre/ebooks/metadata/opf3_test.py
+++ b/src/calibre/ebooks/metadata/opf3_test.py
@@ -14,10 +14,11 @@ from calibre.ebooks.metadata.opf3 import (
read_metadata, set_identifiers, XPath, set_application_id, read_title,
read_refines, set_title, read_title_sort, read_languages, set_languages,
read_authors, Author, set_authors, ensure_prefix, read_prefixes,
- read_book_producers, set_book_producers
+ read_book_producers, set_book_producers, read_timestamp, set_timestamp,
+ read_pubdate, set_pubdate, CALIBRE_PREFIX, read_last_modified
)
-TEMPLATE = '''{metadata}''' # noqa
+TEMPLATE = '''{metadata}''' % CALIBRE_PREFIX # noqa
default_refines = defaultdict(list)
class TestOPF3(unittest.TestCase):
@@ -35,7 +36,7 @@ class TestOPF3(unittest.TestCase):
('xxx:onix', 'xxx:onix'),
('xxx', 'xxx'),
):
- self.ae(expand_prefix(raw, reserved_prefixes), expanded)
+ self.ae(expand_prefix(raw, reserved_prefixes.copy()), expanded)
root = self.get_opf()
ensure_prefix(root, read_prefixes(root), 'calibre', 'https://calibre-ebook.com')
ensure_prefix(root, read_prefixes(root), 'marc', reserved_prefixes['marc'])
@@ -46,7 +47,7 @@ class TestOPF3(unittest.TestCase):
def idt(val, scheme=None, iid=''):
return '{val}'.format(scheme=('opf:scheme="%s"'%scheme if scheme else ''), val=val, id=iid)
def ri(root):
- return dict(read_identifiers(root, reserved_prefixes, default_refines))
+ return dict(read_identifiers(root, read_prefixes(root), default_refines))
for m, result in (
(idt('abc', 'ISBN'), {}),
@@ -67,11 +68,11 @@ class TestOPF3(unittest.TestCase):
self.ae(mi.application_id, 'y')
root = self.get_opf(metadata=idt('i:1', iid='uid') + idt('r:1') + idt('o:1'))
- set_identifiers(root, reserved_prefixes, default_refines, {'i':'2', 'o':'2'})
+ set_identifiers(root, read_prefixes(root), default_refines, {'i':'2', 'o':'2'})
self.ae({'i':['2', '1'], 'r':['1'], 'o':['2']}, ri(root))
self.ae(1, len(XPath('//dc:identifier[@id="uid"]')(root)))
root = self.get_opf(metadata=idt('i:1', iid='uid') + idt('r:1') + idt('o:1'))
- set_identifiers(root, reserved_prefixes, default_refines, {'i':'2', 'o':'2'}, force_identifiers=True)
+ set_identifiers(root, read_prefixes(root), default_refines, {'i':'2', 'o':'2'}, force_identifiers=True)
self.ae({'i':['2', '1'], 'o':['2']}, ri(root))
root = self.get_opf(metadata=idt('i:1', iid='uid') + idt('r:1') + idt('o:1'))
set_application_id(root, default_refines, 'y')
@@ -81,26 +82,26 @@ class TestOPF3(unittest.TestCase):
def test_title(self): # {{{
def rt(root):
- return read_title(root, reserved_prefixes, read_refines(root))
+ return read_title(root, read_prefixes(root), read_refines(root))
def st(root, title, title_sort=None):
- set_title(root, reserved_prefixes, read_refines(root), title, title_sort)
+ set_title(root, read_prefixes(root), read_refines(root), title, title_sort)
return rt(root)
root = self.get_opf('''xxx''')
self.ae(rt(root), 'xxx')
self.ae(st(root, 'abc', 'cba'), 'abc')
- self.ae(read_title_sort(root, reserved_prefixes, read_refines(root)), 'cba')
+ self.ae(read_title_sort(root, read_prefixes(root), read_refines(root)), 'cba')
root = self.get_opf('''yyyx xx
main''')
self.ae(rt(root), 'x xx')
- self.ae(read_title_sort(root, reserved_prefixes, read_refines(root)), 'sorted')
+ self.ae(read_title_sort(root, read_prefixes(root), read_refines(root)), 'sorted')
self.ae(st(root, 'abc'), 'abc')
# }}}
def test_languages(self): # {{{
def rl(root):
- return read_languages(root, reserved_prefixes, read_refines(root))
+ return read_languages(root, read_prefixes(root), read_refines(root))
def st(root, languages):
- set_languages(root, reserved_prefixes, read_refines(root), languages)
+ set_languages(root, read_prefixes(root), read_refines(root), languages)
return rl(root)
root = self.get_opf('''en-USfr''')
self.ae(['eng', 'fra'], rl(root))
@@ -111,9 +112,9 @@ class TestOPF3(unittest.TestCase):
def test_authors(self): # {{{
def rl(root):
- return read_authors(root, reserved_prefixes, read_refines(root))
+ return read_authors(root, read_prefixes(root), read_refines(root))
def st(root, authors):
- set_authors(root, reserved_prefixes, read_refines(root), authors)
+ set_authors(root, read_prefixes(root), read_refines(root), authors)
return rl(root)
root = self.get_opf('''a b''')
self.ae([Author('a b', None)], rl(root))
@@ -128,14 +129,14 @@ class TestOPF3(unittest.TestCase):
self.ae([Author('a b', 'b, a'), Author('c d', 'd, c')], rl(root))
authors = [Author('x y', 'y, x'), Author('u i', None)]
self.ae(authors, st(root, authors))
- self.assertIsNone(root.get('prefix'))
+ self.ae(root.get('prefix'), 'calibre: %s' % CALIBRE_PREFIX)
# }}}
def test_book_producer(self): # {{{
def rl(root):
- return read_book_producers(root, reserved_prefixes, read_refines(root))
+ return read_book_producers(root, read_prefixes(root), read_refines(root))
def st(root, producers):
- set_book_producers(root, reserved_prefixes, read_refines(root), producers)
+ set_book_producers(root, read_prefixes(root), read_refines(root), producers)
return rl(root)
for scheme in ('scheme="marc:relators"', ''):
root = self.get_opf('''a bc d'''
@@ -146,6 +147,30 @@ class TestOPF3(unittest.TestCase):
self.ae('12'.split(), st(root, '12'.split()))
# }}}
+ def test_dates(self): # {{{
+ from calibre.utils.date import utcnow
+ def rl(root):
+ return read_pubdate(root, read_prefixes(root), read_refines(root)), read_timestamp(root, read_prefixes(root), read_refines(root))
+ def st(root, pd, ts):
+ set_pubdate(root, read_prefixes(root), read_refines(root), pd)
+ set_timestamp(root, read_prefixes(root), read_refines(root), ts)
+ return rl(root)
+ def ae(root, y1=None, y2=None):
+ x1, x2 = rl(root)
+ for x, y in ((x1, y1), (x2, y2)):
+ if y is None:
+ self.assertIsNone(x)
+ else:
+ self.ae(y, getattr(x, 'year', None))
+ root = self.get_opf('''1999-3-22001''')
+ ae(root, 1999, 2001)
+ n = utcnow()
+ self.ae(st(root, n, n), (n, n))
+ root = self.get_opf('''1999-3-2''')
+ ae(root, 1999, 2001)
+ root = self.get_opf('''2003''')
+ self.ae(read_last_modified(root, read_prefixes(root), read_refines(root)).year, 2003)
+ # }}}
# Run tests {{{
diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py
index 546c154d98..0f378e85ef 100644
--- a/src/calibre/utils/date.py
+++ b/src/calibre/utils/date.py
@@ -107,6 +107,14 @@ def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
return dt.astimezone(_utc_tz if as_utc else _local_tz)
+def fix_only_date(val):
+ n = val + timedelta(days=1)
+ if n.month > val.month:
+ val = val.replace(day=val.day-1)
+ if val.day == 1:
+ val = val.replace(day=2)
+ return val
+
def parse_only_date(raw, assume_utc=True, as_utc=True):
'''
Parse a date string that contains no time information in a manner that
@@ -116,14 +124,7 @@ def parse_only_date(raw, assume_utc=True, as_utc=True):
f = utcnow if assume_utc else now
default = f().replace(hour=0, minute=0, second=0, microsecond=0,
day=15)
- ans = parse_date(raw, default=default, assume_utc=assume_utc, as_utc=as_utc)
- n = ans + timedelta(days=1)
- if n.month > ans.month:
- ans = ans.replace(day=ans.day-1)
- if ans.day == 1:
- ans = ans.replace(day=2)
- return ans
-
+ return fix_only_date(parse_date(raw, default=default, assume_utc=assume_utc, as_utc=as_utc))
def strptime(val, fmt, assume_utc=False, as_utc=True):
dt = datetime.strptime(val, fmt)