From 11068e0e0946532187b022d193166249ab67ce43 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 09:49:16 -0600
Subject: [PATCH 01/10] Convert all unicode characters to ASCII when creating
file paths on the device
---
installer/linux/freeze.py | 1 +
src/calibre/devices/usbms/device.py | 2 +-
src/calibre/utils/filenames.py | 96 ++---------------------------
3 files changed, 7 insertions(+), 92 deletions(-)
diff --git a/installer/linux/freeze.py b/installer/linux/freeze.py
index 23c4ea7d73..352211379b 100644
--- a/installer/linux/freeze.py
+++ b/installer/linux/freeze.py
@@ -49,6 +49,7 @@ def freeze():
'/usr/lib/libMagickCore.so',
'/usr/lib/libgcrypt.so.11',
'/usr/lib/libgpg-error.so.0',
+ '/usr/lib/libphonon.so.4',
]
binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS]
diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py
index 8bbfd58043..007f132b24 100644
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@@ -15,7 +15,7 @@ from calibre.devices.interface import DevicePlugin
from calibre.devices.errors import DeviceError
from calibre.devices.usbms.deviceconfig import DeviceConfig
from calibre import iswindows, islinux, isosx, __appname__
-from calibre import sanitize_file_name as sanitize
+from calibre.utils.filenames import ascii_filename as sanitize
class Device(DeviceConfig, DevicePlugin):
'''
diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py
index ad5d3f9f05..f243237775 100644
--- a/src/calibre/utils/filenames.py
+++ b/src/calibre/utils/filenames.py
@@ -1,97 +1,11 @@
-# -*- coding: utf-8 -*-
'''
-Make strings safe for use as ASCII filenames, while trying to preserve as much
+Make strings safe for use as ASCII filenames, while trying to preserve as much
meaning as possible.
'''
-import re, string
-
-MAP = {
- u"‘" : "'",
- u"’" : "'",
- u"«" : '"',
- u"»" : '"',
- u"…" : "...",
- u"№" : "#",
- u"Щ" : "Shh",
- u"Ё" : "Jo",
- u"Ж" : "Zh",
- u"Ц" : "C",
- u"Ч" : "Ch",
- u"Ш" : "Sh",
- u"Ы" : "Y",
- u"Ю" : "Ju",
- u"Я" : "Ja",
- u"Б" : "B",
- u"Г" : "G",
- u"Д" : "D",
- u"И" : "I",
- u"Й" : "J",
- u"К" : "K",
- u"Л" : "L",
- u"П" : "P",
- u"Ф" : "F",
- u"Э" : "E",
- u"Ъ" : "`",
- u"Ь" : "'",
- u"щ" : "shh",
- u"ё" : "jo",
- u"ж" : "zh",
- u"ц" : "c",
- u"ч" : "ch",
- u"ш" : "sh",
- u"ы" : "y",
- u"ю" : "ju",
- u"я" : "ja",
- u"б" : "b",
- u"в" : "v",
- u"г" : "g",
- u"д" : "d",
- u"з" : "z",
- u"и" : "i",
- u"й" : "j",
- u"к" : "k",
- u"л" : "l",
- u"м" : "m",
- u"н" : "n",
- u"о" : "o",
- u"п" : "p",
- u"т" : "t",
- u"ф" : "f",
- u"э" : "e",
- u"ъ" : "`",
- u"ь" : "'",
- u"А" : "A",
- u"В" : "V",
- u"Е" : "Je",
- u"З" : "Z",
- u"М" : "M",
- u"Н" : "N",
- u"О" : "O",
- u"Р" : "R",
- u"С" : "S",
- u"Т" : "T",
- u"У" : "U",
- u"Х" : "Kh",
- u"Є" : "Je",
- u"Ї" : "Ji",
- u"а" : "a",
- u"е" : "je",
- u"р" : "r",
- u"с" : "s",
- u"у" : "u",
- u"х" : "kh",
- u"є" : "je",
-} #: Translation table
-
-for c in string.whitespace:
- MAP[c] = ' '
-PAT = re.compile('['+u''.join(MAP.keys())+']')
+from calibre.ebooks.unidecode.unidecoder import Unidecoder
+from calibre import sanitize_file_name
+udc = Unidecoder()
def ascii_filename(orig):
- orig = PAT.sub(lambda m:MAP[m.group()], orig)
- buf = []
- for i in range(len(orig)):
- val = ord(orig[i])
- buf.append('_' if val < 33 or val > 126 else orig[i])
- return (''.join(buf)).encode('ascii')
+ return sanitize_file_name(udc.decode(orig).replace('?', '_'))
From 6cf006db0592f08e9967ad4fa42a9f467ed70947 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 10:26:51 -0600
Subject: [PATCH 02/10] Intelligently convert (almost) all filenames to ASCII.
This should make for more readable file names as opposed to the previous
practice of simply replacing unicode chracters with underscores.
---
src/calibre/devices/cybookg3/driver.py | 4 ++--
src/calibre/devices/jetbook/driver.py | 2 +-
src/calibre/ebooks/mobi/reader.py | 5 ++--
src/calibre/ebooks/unidecode/unidecoder.py | 8 +++++--
src/calibre/gui2/device.py | 8 +++----
src/calibre/gui2/main.py | 5 ++--
src/calibre/library/database2.py | 28 +++++++++++-----------
src/calibre/web/fetch/simple.py | 8 +++----
8 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index 9976a0f02e..a8ce905b00 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -8,7 +8,7 @@ import os
import shutil
from itertools import cycle
-from calibre import sanitize_file_name as sanitize
+from calibre.utils.filenames import ascii_filename as sanitize
from calibre.devices.usbms.driver import USBMS
import calibre.devices.cybookg3.t2b as t2b
@@ -98,7 +98,7 @@ class CYBOOKG3(USBMS):
self.report_progress(i / float(len(files)), _('Transferring books to device...'))
self.report_progress(1.0, _('Transferring books to device...'))
-
+
return zip(paths, cycle([on_card]))
def delete_books(self, paths, end_session=True):
diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py
index 9e91f4cfa9..c6668364a7 100644
--- a/src/calibre/devices/jetbook/driver.py
+++ b/src/calibre/devices/jetbook/driver.py
@@ -8,7 +8,7 @@ import os, re, sys, shutil
from itertools import cycle
from calibre.devices.usbms.driver import USBMS
-from calibre import sanitize_file_name as sanitize
+from calibre.utils.filenames import ascii_filename as sanitize
from calibre.ebooks.metadata import string_to_authors
class JETBOOK(USBMS):
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index 3d46668ee9..eed42bce46 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -21,7 +21,8 @@ except ImportError:
from lxml import html, etree
-from calibre import entity_to_unicode, sanitize_file_name
+from calibre import entity_to_unicode
+from calibre.utils.filenames import ascii_filename
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks import DRMError
from calibre.ebooks.chardet import ENCODING_PATS
@@ -374,7 +375,7 @@ class MobiReader(object):
fname = self.name.encode('ascii', 'replace')
fname = re.sub(r'[\x08\x15\0]+', '', fname)
htmlfile = os.path.join(output_dir,
- sanitize_file_name(fname) + '.html')
+ ascii_filename(fname) + '.html')
try:
for ref in guide.xpath('descendant::reference'):
if ref.attrib.has_key('href'):
diff --git a/src/calibre/ebooks/unidecode/unidecoder.py b/src/calibre/ebooks/unidecode/unidecoder.py
index 8da60d29e9..d31239a1dc 100644
--- a/src/calibre/ebooks/unidecode/unidecoder.py
+++ b/src/calibre/ebooks/unidecode/unidecoder.py
@@ -57,6 +57,7 @@ it under the same terms as Perl itself.
import re
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
+from calibre.constants import preferred_encoding
class Unidecoder(object):
@@ -70,7 +71,10 @@ class Unidecoder(object):
try:
text = unicode(text)
except:
- text = text.decode('utf-8', 'ignore')
+ try:
+ text = text.decode(preferred_encoding)
+ except:
+ text = text.decode('utf-8', 'replace')
# Replace characters larger than 127 with their ASCII equivelent.
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
text)
@@ -80,7 +84,7 @@ class Unidecoder(object):
Returns the replacement character or ? if none can be found.
'''
try:
- # Splite the unicode character xABCD into parts 0xAB and 0xCD.
+ # Split the unicode character xABCD into parts 0xAB and 0xCD.
# 0xAB represents the group within CODEPOINTS to query and 0xCD
# represents the position in the list of characters for the group.
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 19228afa92..ffbcb2e9e2 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -21,7 +21,7 @@ from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \
pixmap_to_data, warning_dialog, \
question_dialog
from calibre.ebooks.metadata import authors_to_string
-from calibre import sanitize_file_name, preferred_encoding
+from calibre import preferred_encoding
from calibre.utils.filenames import ascii_filename
from calibre.devices.errors import FreeSpaceError
from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \
@@ -542,7 +542,7 @@ class DeviceGUI(object):
'\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \
_('in the %s format.') %
os.path.splitext(f)[1][1:].upper())
- prefix = sanitize_file_name(t+' - '+a)
+ prefix = ascii_filename(t+' - '+a)
if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace')
attachment_names.append(prefix + os.path.splitext(f)[1])
@@ -693,7 +693,7 @@ class DeviceGUI(object):
rows_are_ids=True)
names = []
for mi in metadata:
- prefix = sanitize_file_name(mi['title'])
+ prefix = ascii_filename(mi['title'])
if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace')
prefix = ascii_filename(prefix)
@@ -758,7 +758,7 @@ class DeviceGUI(object):
a = mi['authors']
if not a:
a = _('Unknown')
- prefix = sanitize_file_name(t+' - '+a)
+ prefix = ascii_filename(t+' - '+a)
if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace')
prefix = ascii_filename(prefix)
diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py
index b4b8494c64..5874e57599 100644
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@@ -14,8 +14,9 @@ from PyQt4.Qt import Qt, SIGNAL, QObject, QCoreApplication, QUrl, QTimer, \
QMessageBox, QStackedLayout
from PyQt4.QtSvg import QSvgRenderer
-from calibre import __version__, __appname__, sanitize_file_name, \
+from calibre import __version__, __appname__, \
iswindows, isosx, prints, patheq
+from calibre.utils.filenames import ascii_filename
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server
@@ -852,7 +853,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
if paths:
self.upload_books(paths,
- list(map(sanitize_file_name, names)),
+ list(map(ascii_filename, names)),
infos, on_card=on_card)
self.status_bar.showMessage(
_('Uploading books to device.'), 2000)
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index d7344b5681..c04e27aa75 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -34,7 +34,7 @@ from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_e
from calibre.ptempfile import PersistentTemporaryFile
from calibre.customize.ui import run_plugins_on_import
-from calibre import sanitize_file_name
+from calibre.utils.filenames import ascii_filename
from calibre.ebooks import BOOK_EXTENSIONS
if iswindows:
@@ -652,8 +652,8 @@ class LibraryDatabase2(LibraryDatabase):
authors = self.authors(id, index_is_id=True)
if not authors:
authors = _('Unknown')
- author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
- title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
+ author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
+ title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
path = author + '/' + title + ' (%d)'%id
return path
@@ -664,8 +664,8 @@ class LibraryDatabase2(LibraryDatabase):
authors = self.authors(id, index_is_id=True)
if not authors:
authors = _('Unknown')
- author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
- title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
+ author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
+ title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
name = title + ' - ' + author
while name.endswith('.'):
name = name[:-1]
@@ -1520,12 +1520,12 @@ class LibraryDatabase2(LibraryDatabase):
x['cover'] = os.path.join(path, 'cover.jpg')
if not self.has_cover(x['id'], index_is_id=True):
x['cover'] = None
- path += os.sep + self.construct_file_name(record[FIELD_MAP['id']]) + '.%s'
formats = self.formats(record[FIELD_MAP['id']], index_is_id=True)
if formats:
for fmt in formats.split(','):
- x['formats'].append(path%fmt.lower())
- x['fmt_'+fmt.lower()] = path%fmt.lower()
+ path = self.format_abspath(x['id'], fmt, index_is_id=True)
+ x['formats'].append(path)
+ x['fmt_'+fmt.lower()] = path
x['available_formats'] = [i.upper() for i in formats.split(',')]
return data
@@ -1602,12 +1602,12 @@ books_series_link feeds
by_author[au] = []
by_author[au].append(index)
for au in by_author.keys():
- apath = os.path.join(dir, sanitize_file_name(au))
+ apath = os.path.join(dir, ascii_filename(au))
if not single_dir and not os.path.exists(apath):
os.mkdir(apath)
for idx in by_author[au]:
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
- tpath = os.path.join(apath, sanitize_file_name(title))
+ tpath = os.path.join(apath, ascii_filename(title))
id = idx if index_is_id else self.id(idx)
id = str(id)
if not single_dir and not os.path.exists(tpath):
@@ -1621,10 +1621,10 @@ books_series_link feeds
mi.authors = [_('Unknown')]
cdata = self.cover(int(id), index_is_id=True)
if cdata is not None:
- cname = sanitize_file_name(name)+'.jpg'
+ cname = ascii_filename(name)+'.jpg'
open(os.path.join(base, cname), 'wb').write(cdata)
mi.cover = cname
- with open(os.path.join(base, sanitize_file_name(name)+'.opf'),
+ with open(os.path.join(base, ascii_filename(name)+'.opf'),
'wb') as f:
f.write(metadata_to_opf(mi))
@@ -1636,7 +1636,7 @@ books_series_link feeds
if not data:
continue
fname = name +'.'+fmt.lower()
- fname = sanitize_file_name(fname)
+ fname = ascii_filename(fname)
f = open(os.path.join(base, fname), 'w+b')
f.write(data)
f.flush()
@@ -1671,7 +1671,7 @@ books_series_link feeds
if not au:
au = _('Unknown')
fname = '%s - %s.%s'%(title, au, format.lower())
- fname = sanitize_file_name(fname)
+ fname = ascii_filename(fname)
if not os.path.exists(dir):
os.makedirs(dir)
f = open(os.path.join(dir, fname), 'w+b')
diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py
index f956c4ee10..238ab343a9 100644
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@@ -14,8 +14,8 @@ from httplib import responses
from PIL import Image
from cStringIO import StringIO
-from calibre import browser, sanitize_file_name, \
- relpath, unicode_path
+from calibre import browser, relpath, unicode_path
+from calibre.utils.filenames import ascii_filename
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
@@ -313,7 +313,7 @@ class RecursiveFetcher(object):
self.log.exception('Could not fetch image %s'% iurl)
continue
c += 1
- fname = sanitize_file_name('img'+str(c)+ext)
+ fname = ascii_filename('img'+str(c)+ext)
if isinstance(fname, unicode):
fname = fname.encode('ascii', 'replace')
imgpath = os.path.join(diskpath, fname+'.jpg')
@@ -416,7 +416,7 @@ class RecursiveFetcher(object):
if not isinstance(_fname, unicode):
_fname.decode('latin1', 'replace')
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
- _fname = sanitize_file_name(_fname)
+ _fname = ascii_filename(_fname)
_fname = os.path.splitext(_fname)[0]+'.xhtml'
res = os.path.join(linkdiskpath, _fname)
self.downloaded_paths.append(res)
From 8a4754f1d08f7c07051508053f5c122792a86eef Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 10:55:14 -0600
Subject: [PATCH 03/10] Fix #2934 (Crash when aborting cover download)
---
src/calibre/gui2/dialogs/metadata_single.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 10a81be9f5..d349298609 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -526,3 +526,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
QDialog.accept(self)
if callable(self.accepted_callback):
self.accepted_callback(self.id)
+
+ def reject(self, *args):
+ cf = getattr(self, 'cover_fetcher', None)
+ if cf is not None and hasattr(cf, 'terminate'):
+ cf.terminate()
+ cf.wait()
+
+ QDialog.reject(self, *args)
From 2436866707270c60260004df77492bce4520bb31 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 11:08:25 -0600
Subject: [PATCH 04/10] Fix #2938 (comic2lrf gives EOFerror)
---
src/calibre/ebooks/comic/input.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py
index 2eed12de0c..c039b06676 100755
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@@ -253,7 +253,6 @@ def process_pages(pages, opts, update, tdir):
for job in jobs:
if job.failed:
- raw_input()
raise Exception(_('Failed to process comic: \n\n%s')%
job.log_file.read())
pages, failures_ = job.result
From fa603d130c3cac184871262ef26b55cc40333752 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 11:29:28 -0600
Subject: [PATCH 05/10] Fix #2930 (No book is selected after deleting a book)
---
src/calibre/gui2/main.py | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py
index 5874e57599..3da3a33c69 100644
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@@ -889,7 +889,17 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
'removed from your computer. Are you sure?')
+'
', 'library_delete_books', self):
return
+ ci = view.currentIndex()
+ row = None
+ if ci.isValid():
+ row = ci.row()
view.model().delete_books(rows)
+ if row is not None:
+ ci = view.model().index(row, 0)
+ if ci.isValid():
+ view.setCurrentIndex(ci)
+ sm = view.selectionModel()
+ sm.select(ci, sm.Select)
else:
if self.stack.currentIndex() == 1:
view = self.memory_view
From 0b4d97a8a474667c09631582fe742f718f906aca Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 11:44:01 -0600
Subject: [PATCH 06/10] Fix file selection dialogs not choosing the correct
file extension filter by default
---
src/calibre/gui2/__init__.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index f5dcdcfebe..12f6fb2025 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -342,6 +342,8 @@ class FileDialog(QObject):
ftext += '%s (%s);;'%(text, ' '.join(extensions))
if add_all_files_filter or not ftext:
ftext += 'All files (*)'
+ if ftext.endswith(';;'):
+ ftext = ftext[:-2]
self.dialog_name = name if name else 'dialog_' + title
self.selected_files = None
From 2c5d95113912ab6fac4e845a92337c7187bbeabc Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 11:54:18 -0600
Subject: [PATCH 07/10] IGN:Remove note about libphonon from download page
---
src/calibre/trac/plugins/templates/linux.html | 4 ----
1 file changed, 4 deletions(-)
diff --git a/src/calibre/trac/plugins/templates/linux.html b/src/calibre/trac/plugins/templates/linux.html
index 58c3e8ad6a..b948dccc74 100644
--- a/src/calibre/trac/plugins/templates/linux.html
+++ b/src/calibre/trac/plugins/templates/linux.html
@@ -82,10 +82,6 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal.
Note
- - On some linux distributions, you have to install the
- libphonon (may be called libphonon4) package for calibre
- to work.
-
-
When running the command line utilities,
they will segfault after completion. This can
From a80d7050495754c4893ce5e86f6e9b3860498bea Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 12:09:21 -0600
Subject: [PATCH 08/10] lrs2lrf: Handle missing style labels gracefully
---
src/calibre/ebooks/lrf/lrs/convert_from.py | 82 +++++++++++-----------
src/calibre/utils/filenames.py | 14 +++-
2 files changed, 54 insertions(+), 42 deletions(-)
diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py
index 86a97aa70b..fd0dd91be0 100644
--- a/src/calibre/ebooks/lrf/lrs/convert_from.py
+++ b/src/calibre/ebooks/lrf/lrs/convert_from.py
@@ -18,38 +18,38 @@ from calibre.ebooks.lrf.pylrs.pylrs import Book, PageStyle, TextStyle, \
from calibre.ebooks.chardet import xml_to_unicode
class LrsParser(object):
-
- SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space',
- 'PutObj', 'RuledLine',
+
+ SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space',
+ 'PutObj', 'RuledLine',
'Plot', 'SetDefault', 'BookSetting', 'RegistFont',
'PageStyle', 'TextStyle', 'BlockStyle', 'JumpTo',
'ImageStream', 'Image']]
-
+
def __init__(self, stream, logger):
self.logger = logger
src = stream.read()
self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0],
- convertEntities=BeautifulStoneSoup.XML_ENTITIES,
+ convertEntities=BeautifulStoneSoup.XML_ENTITIES,
selfClosingTags=self.SELF_CLOSING_TAGS)
self.objects = {}
for obj in self.soup.findAll(objid=True):
self.objects[obj['objid']] = obj
-
+
self.parsed_objects = {}
self.first_pass()
self.second_pass()
self.third_pass()
self.fourth_pass()
self.fifth_pass()
-
+
def fifth_pass(self):
for tag in self.soup.findAll(['canvas', 'header', 'footer']):
canvas = self.parsed_objects[tag.get('objid')]
for po in tag.findAll('putobj'):
canvas.put_object(self.parsed_objects[po.get('refobj')],
po.get('x1'), po.get('y1'))
-
-
+
+
@classmethod
def attrs_to_dict(cls, tag, exclude=('objid',)):
result = {}
@@ -58,7 +58,7 @@ class LrsParser(object):
continue
result[str(key)] = val
return result
-
+
def text_tag_to_element(self, tag):
map = {
'span' : Span,
@@ -77,7 +77,7 @@ class LrsParser(object):
settings = self.attrs_to_dict(tag)
settings.pop('spanstyle', '')
return map[tag.name](**settings)
-
+
def process_text_element(self, tag, elem):
for item in tag.contents:
if isinstance(item, NavigableString):
@@ -86,8 +86,8 @@ class LrsParser(object):
subelem = self.text_tag_to_element(item)
elem.append(subelem)
self.process_text_element(item, subelem)
-
-
+
+
def process_paragraph(self, tag):
p = Paragraph()
contents = [i for i in tag.contents]
@@ -104,7 +104,7 @@ class LrsParser(object):
p.append(elem)
self.process_text_element(item, elem)
return p
-
+
def process_text_block(self, tag):
tb = self.parsed_objects[tag.get('objid')]
for item in tag.contents:
@@ -119,25 +119,25 @@ class LrsParser(object):
elem = self.text_tag_to_element(item)
self.process_text_element(item, elem)
p.append(elem)
-
+
def fourth_pass(self):
for tag in self.soup.findAll('page'):
page = self.parsed_objects[tag.get('objid')]
self.book.append(page)
- for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock',
+ for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock',
'ruledline', 'simpletextblock']):
if block_tag.name == 'ruledline':
page.append(RuledLine(**self.attrs_to_dict(block_tag)))
else:
page.append(self.parsed_objects[block_tag.get('objid')])
-
+
for tag in self.soup.find('objects').findAll('button'):
jt = tag.find('jumpto')
tb = self.parsed_objects[jt.get('refobj')]
jb = JumpButton(tb)
self.book.append(jb)
self.parsed_objects[tag.get('objid')] = jb
-
+
for tag in self.soup.findAll(['textblock', 'simpletextblock']):
self.process_text_block(tag)
toc = self.soup.find('toc')
@@ -145,11 +145,11 @@ class LrsParser(object):
for tag in toc.findAll('toclabel'):
label = self.tag_to_string(tag)
self.book.addTocEntry(label, self.parsed_objects[tag.get('refobj')])
-
-
+
+
def third_pass(self):
map = {
- 'page' : (Page, ['pagestyle', 'evenfooterid',
+ 'page' : (Page, ['pagestyle', 'evenfooterid',
'oddfooterid', 'evenheaderid', 'oddheaderid']),
'textblock' : (TextBlock, ['textstyle', 'blockstyle']),
'simpletextblock' : (TextBlock, ['textstyle', 'blockstyle']),
@@ -167,7 +167,7 @@ class LrsParser(object):
settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel'])
for a in ('pagestyle', 'blockstyle', 'textstyle'):
label = tag.get(a, False)
- if label:
+ if label and label in self._style_labels:
_obj = self.parsed_objects[label] if \
self.parsed_objects.has_key(label) else \
self._style_labels[label]
@@ -181,9 +181,9 @@ class LrsParser(object):
if tag.has_key('canvaswidth'):
args += [tag.get('canvaswidth'), tag.get('canvasheight')]
self.parsed_objects[id] = map[tag.name][0](*args, **settings)
-
-
-
+
+
+
def second_pass(self):
map = {
'pagestyle' : (PageStyle, ['stylelabel', 'evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid']),
@@ -207,8 +207,8 @@ class LrsParser(object):
self._style_labels[x] = self.parsed_objects[id]
if tag.name == 'registfont':
self.book.append(self.parsed_objects[id])
-
-
+
+
@classmethod
def tag_to_string(cls, tag):
'''
@@ -226,20 +226,20 @@ class LrsParser(object):
res = cls.tag_to_string(item)
if res:
strings.append(res)
- return u''.join(strings)
-
+ return u''.join(strings)
+
def first_pass(self):
info = self.soup.find('bbebxylog').find('bookinformation').find('info')
bookinfo = info.find('bookinfo')
docinfo = info.find('docinfo')
-
+
def me(base, tagname):
tag = base.find(tagname.lower())
if tag is None:
return ('', '', '')
tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '')
return tag
-
+
title = me(bookinfo, 'Title')
author = me(bookinfo, 'Author')
publisher = me(bookinfo, 'Publisher')
@@ -250,12 +250,12 @@ class LrsParser(object):
creator = me(docinfo, 'Creator')[0]
producer = me(docinfo, 'Producer')[0]
bookid = me(bookinfo, 'BookID')[0]
-
+
sd = self.soup.find('setdefault')
sd = StyleDefault(**self.attrs_to_dict(sd, ['page_tree_id', 'rubyalignandadjust']))
bs = self.soup.find('booksetting')
bs = BookSetting(**self.attrs_to_dict(bs, []))
-
+
settings = {}
thumbnail = self.soup.find('cthumbnail')
if thumbnail is not None:
@@ -264,23 +264,23 @@ class LrsParser(object):
settings['thumbnail'] = f
else:
print _('Could not read from thumbnail file:'), f
-
+
self.book = Book(title=title, author=author, publisher=publisher,
category=category, classification=classification,
freetext=freetext, language=language, creator=creator,
producer=producer, bookid=bookid, setdefault=sd,
booksetting=bs, **settings)
-
+
for hdr in self.soup.findAll(['header', 'footer']):
elem = Header if hdr.name == 'header' else Footer
- self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr))
-
+ self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr))
+
def render(self, file, to_lrs=False):
if to_lrs:
self.book.renderLrs(file, 'utf-8')
else:
self.book.renderLrf(file)
-
+
def option_parser():
parser = OptionParser(usage=_('%prog [options] file.lrs\nCompile an LRS file into an LRF file.'))
@@ -299,7 +299,7 @@ def main(args=sys.argv, logger=None):
level = logging.DEBUG if opts.verbose else logging.INFO
logger = logging.getLogger('lrs2lrf')
setup_cli_handlers(logger, level)
-
+
if len(args) != 2:
parser.print_help()
return 1
@@ -310,7 +310,7 @@ def main(args=sys.argv, logger=None):
if opts.verbose:
import warnings
warnings.defaultaction = 'error'
-
+
logger.info('Parsing LRS file...')
converter = LrsParser(open(args[1], 'rb'), logger)
logger.info('Writing to output file...')
@@ -320,4 +320,4 @@ def main(args=sys.argv, logger=None):
if __name__ == '__main__':
- sys.exit(main())
\ No newline at end of file
+ sys.exit(main())
diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py
index f243237775..9146cb017d 100644
--- a/src/calibre/utils/filenames.py
+++ b/src/calibre/utils/filenames.py
@@ -5,7 +5,19 @@ meaning as possible.
from calibre.ebooks.unidecode.unidecoder import Unidecoder
from calibre import sanitize_file_name
+from calibre.constants import preferred_encoding
udc = Unidecoder()
+def ascii_text(orig):
+ try:
+ ascii = udc.decode(orig)
+ except:
+ if isinstance(orig, unicode):
+ ascii = orig.encode('ascii', 'replace')
+ ascii = orig.decode(preferred_encoding,
+ 'replace').encode('ascii', 'replace')
+ return ascii
+
+
def ascii_filename(orig):
- return sanitize_file_name(udc.decode(orig).replace('?', '_'))
+ return sanitize_file_name(ascii_text(orig).replace('?', '_'))
From 522e75cf6db501e655f21c55792afedf77bd8b54 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 12:23:29 -0600
Subject: [PATCH 09/10] Fix insert metadata inserting tags around Series
and Tags fields
---
src/calibre/ebooks/oeb/transforms/jacket.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index add9f85e08..6d36ef44fa 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -65,9 +65,9 @@ class Jacket(object):
if not comments.strip():
comments = ''
comments = comments.replace('\r\n', '\n').replace('\n\n', '
')
- series = 'Series: ' + mi.series if mi.series else ''
+ series = 'Series: ' + escape(mi.series if mi.series else '')
if series and mi.series_index is not None:
- series += ' [%s]'%mi.format_series_index()
+ series += escape(' [%s]'%mi.format_series_index())
tags = mi.tags
if not tags:
try:
@@ -75,7 +75,7 @@ class Jacket(object):
except:
tags = []
if tags:
- tags = 'Tags: ' + self.opts.dest.tags_to_string(tags)
+ tags = 'Tags: ' + escape(self.opts.dest.tags_to_string(tags))
else:
tags = ''
try:
@@ -84,8 +84,8 @@ class Jacket(object):
title = _('Unknown')
html = self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
title=escape(title), comments=escape(comments),
- jacket=escape(_('Book Jacket')), series=escape(series),
- tags=escape(tags))
+ jacket=escape(_('Book Jacket')), series=series,
+ tags=tags)
id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml')
root = etree.fromstring(html)
item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
From 64114c0847b453a824672a27153cb2556afaaa5a Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sun, 26 Jul 2009 12:24:56 -0600
Subject: [PATCH 10/10] IGN:...
---
src/calibre/ebooks/lrf/lrs/convert_from.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py
index fd0dd91be0..e0ce88c2b9 100644
--- a/src/calibre/ebooks/lrf/lrs/convert_from.py
+++ b/src/calibre/ebooks/lrf/lrs/convert_from.py
@@ -167,7 +167,8 @@ class LrsParser(object):
settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel'])
for a in ('pagestyle', 'blockstyle', 'textstyle'):
label = tag.get(a, False)
- if label and label in self._style_labels:
+ if label and \
+ (label in self._style_labels or label in self.parsed_objects):
_obj = self.parsed_objects[label] if \
self.parsed_objects.has_key(label) else \
self._style_labels[label]