Use cp1252 instead of utf-8 for ereader metadata as per spec

This commit is contained in:
Kovid Goyal 2019-06-22 05:09:33 +05:30
parent cc7eaf016a
commit 2c1a1813ce
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -49,12 +49,12 @@ def get_metadata(stream, extract_cover=True):
try: try:
mdata = pheader.section_data(hr.metadata_offset) mdata = pheader.section_data(hr.metadata_offset)
mdata = mdata.decode('utf-8').split('\x00') mdata = mdata.decode('cp1252', 'replace').split('\x00')
mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4]) mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4])
except: except Exception:
pass pass
if extract_cover: if extract_cover:
@ -96,8 +96,8 @@ def set_metadata(stream, mi):
# Merge the metadata into the file # Merge the metadata into the file
file_mi = get_metadata(stream, False) file_mi = get_metadata(stream, False)
file_mi.smart_update(mi) file_mi.smart_update(mi)
sections[hr.metadata_offset] = ('%s\x00%s\x00%s\x00%s\x00%s\x00' % \ sections[hr.metadata_offset] = ('%s\x00%s\x00%s\x00%s\x00%s\x00' % (
(file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)).encode('utf-8') file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)).encode('cp1252', 'replace')
# Rebuild the PDB wrapper because the offsets have changed due to the # Rebuild the PDB wrapper because the offsets have changed due to the
# new metadata. # new metadata.