From 11068e0e0946532187b022d193166249ab67ce43 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 09:49:16 -0600 Subject: [PATCH 01/10] Convert all unicode characters to ASCII when creating file paths on the device --- installer/linux/freeze.py | 1 + src/calibre/devices/usbms/device.py | 2 +- src/calibre/utils/filenames.py | 96 ++--------------------------- 3 files changed, 7 insertions(+), 92 deletions(-) diff --git a/installer/linux/freeze.py b/installer/linux/freeze.py index 23c4ea7d73..352211379b 100644 --- a/installer/linux/freeze.py +++ b/installer/linux/freeze.py @@ -49,6 +49,7 @@ def freeze(): '/usr/lib/libMagickCore.so', '/usr/lib/libgcrypt.so.11', '/usr/lib/libgpg-error.so.0', + '/usr/lib/libphonon.so.4', ] binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS] diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 8bbfd58043..007f132b24 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -15,7 +15,7 @@ from calibre.devices.interface import DevicePlugin from calibre.devices.errors import DeviceError from calibre.devices.usbms.deviceconfig import DeviceConfig from calibre import iswindows, islinux, isosx, __appname__ -from calibre import sanitize_file_name as sanitize +from calibre.utils.filenames import ascii_filename as sanitize class Device(DeviceConfig, DevicePlugin): ''' diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py index ad5d3f9f05..f243237775 100644 --- a/src/calibre/utils/filenames.py +++ b/src/calibre/utils/filenames.py @@ -1,97 +1,11 @@ -# -*- coding: utf-8 -*- ''' -Make strings safe for use as ASCII filenames, while trying to preserve as much +Make strings safe for use as ASCII filenames, while trying to preserve as much meaning as possible. ''' -import re, string - -MAP = { - u"‘" : "'", - u"’" : "'", - u"«" : '"', - u"»" : '"', - u"…" : "...", - u"№" : "#", - u"Щ" : "Shh", - u"Ё" : "Jo", - u"Ж" : "Zh", - u"Ц" : "C", - u"Ч" : "Ch", - u"Ш" : "Sh", - u"Ы" : "Y", - u"Ю" : "Ju", - u"Я" : "Ja", - u"Б" : "B", - u"Г" : "G", - u"Д" : "D", - u"И" : "I", - u"Й" : "J", - u"К" : "K", - u"Л" : "L", - u"П" : "P", - u"Ф" : "F", - u"Э" : "E", - u"Ъ" : "`", - u"Ь" : "'", - u"щ" : "shh", - u"ё" : "jo", - u"ж" : "zh", - u"ц" : "c", - u"ч" : "ch", - u"ш" : "sh", - u"ы" : "y", - u"ю" : "ju", - u"я" : "ja", - u"б" : "b", - u"в" : "v", - u"г" : "g", - u"д" : "d", - u"з" : "z", - u"и" : "i", - u"й" : "j", - u"к" : "k", - u"л" : "l", - u"м" : "m", - u"н" : "n", - u"о" : "o", - u"п" : "p", - u"т" : "t", - u"ф" : "f", - u"э" : "e", - u"ъ" : "`", - u"ь" : "'", - u"А" : "A", - u"В" : "V", - u"Е" : "Je", - u"З" : "Z", - u"М" : "M", - u"Н" : "N", - u"О" : "O", - u"Р" : "R", - u"С" : "S", - u"Т" : "T", - u"У" : "U", - u"Х" : "Kh", - u"Є" : "Je", - u"Ї" : "Ji", - u"а" : "a", - u"е" : "je", - u"р" : "r", - u"с" : "s", - u"у" : "u", - u"х" : "kh", - u"є" : "je", -} #: Translation table - -for c in string.whitespace: - MAP[c] = ' ' -PAT = re.compile('['+u''.join(MAP.keys())+']') +from calibre.ebooks.unidecode.unidecoder import Unidecoder +from calibre import sanitize_file_name +udc = Unidecoder() def ascii_filename(orig): - orig = PAT.sub(lambda m:MAP[m.group()], orig) - buf = [] - for i in range(len(orig)): - val = ord(orig[i]) - buf.append('_' if val < 33 or val > 126 else orig[i]) - return (''.join(buf)).encode('ascii') + return sanitize_file_name(udc.decode(orig).replace('?', '_')) From 6cf006db0592f08e9967ad4fa42a9f467ed70947 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 10:26:51 -0600 Subject: [PATCH 02/10] Intelligently convert (almost) all filenames to ASCII. This should make for more readable file names as opposed to the previous practice of simply replacing unicode chracters with underscores. --- src/calibre/devices/cybookg3/driver.py | 4 ++-- src/calibre/devices/jetbook/driver.py | 2 +- src/calibre/ebooks/mobi/reader.py | 5 ++-- src/calibre/ebooks/unidecode/unidecoder.py | 8 +++++-- src/calibre/gui2/device.py | 8 +++---- src/calibre/gui2/main.py | 5 ++-- src/calibre/library/database2.py | 28 +++++++++++----------- src/calibre/web/fetch/simple.py | 8 +++---- 8 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py index 9976a0f02e..a8ce905b00 100644 --- a/src/calibre/devices/cybookg3/driver.py +++ b/src/calibre/devices/cybookg3/driver.py @@ -8,7 +8,7 @@ import os import shutil from itertools import cycle -from calibre import sanitize_file_name as sanitize +from calibre.utils.filenames import ascii_filename as sanitize from calibre.devices.usbms.driver import USBMS import calibre.devices.cybookg3.t2b as t2b @@ -98,7 +98,7 @@ class CYBOOKG3(USBMS): self.report_progress(i / float(len(files)), _('Transferring books to device...')) self.report_progress(1.0, _('Transferring books to device...')) - + return zip(paths, cycle([on_card])) def delete_books(self, paths, end_session=True): diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py index 9e91f4cfa9..c6668364a7 100644 --- a/src/calibre/devices/jetbook/driver.py +++ b/src/calibre/devices/jetbook/driver.py @@ -8,7 +8,7 @@ import os, re, sys, shutil from itertools import cycle from calibre.devices.usbms.driver import USBMS -from calibre import sanitize_file_name as sanitize +from calibre.utils.filenames import ascii_filename as sanitize from calibre.ebooks.metadata import string_to_authors class JETBOOK(USBMS): diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 3d46668ee9..eed42bce46 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -21,7 +21,8 @@ except ImportError: from lxml import html, etree -from calibre import entity_to_unicode, sanitize_file_name +from calibre import entity_to_unicode +from calibre.utils.filenames import ascii_filename from calibre.ptempfile import TemporaryDirectory from calibre.ebooks import DRMError from calibre.ebooks.chardet import ENCODING_PATS @@ -374,7 +375,7 @@ class MobiReader(object): fname = self.name.encode('ascii', 'replace') fname = re.sub(r'[\x08\x15\0]+', '', fname) htmlfile = os.path.join(output_dir, - sanitize_file_name(fname) + '.html') + ascii_filename(fname) + '.html') try: for ref in guide.xpath('descendant::reference'): if ref.attrib.has_key('href'): diff --git a/src/calibre/ebooks/unidecode/unidecoder.py b/src/calibre/ebooks/unidecode/unidecoder.py index 8da60d29e9..d31239a1dc 100644 --- a/src/calibre/ebooks/unidecode/unidecoder.py +++ b/src/calibre/ebooks/unidecode/unidecoder.py @@ -57,6 +57,7 @@ it under the same terms as Perl itself. import re from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS +from calibre.constants import preferred_encoding class Unidecoder(object): @@ -70,7 +71,10 @@ class Unidecoder(object): try: text = unicode(text) except: - text = text.decode('utf-8', 'ignore') + try: + text = text.decode(preferred_encoding) + except: + text = text.decode('utf-8', 'replace') # Replace characters larger than 127 with their ASCII equivelent. return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()), text) @@ -80,7 +84,7 @@ class Unidecoder(object): Returns the replacement character or ? if none can be found. ''' try: - # Splite the unicode character xABCD into parts 0xAB and 0xCD. + # Split the unicode character xABCD into parts 0xAB and 0xCD. # 0xAB represents the group within CODEPOINTS to query and 0xCD # represents the position in the list of characters for the group. return CODEPOINTS[self.code_group(codepoint)][self.grouped_point( diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 19228afa92..ffbcb2e9e2 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -21,7 +21,7 @@ from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \ pixmap_to_data, warning_dialog, \ question_dialog from calibre.ebooks.metadata import authors_to_string -from calibre import sanitize_file_name, preferred_encoding +from calibre import preferred_encoding from calibre.utils.filenames import ascii_filename from calibre.devices.errors import FreeSpaceError from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \ @@ -542,7 +542,7 @@ class DeviceGUI(object): '\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \ _('in the %s format.') % os.path.splitext(f)[1][1:].upper()) - prefix = sanitize_file_name(t+' - '+a) + prefix = ascii_filename(t+' - '+a) if not isinstance(prefix, unicode): prefix = prefix.decode(preferred_encoding, 'replace') attachment_names.append(prefix + os.path.splitext(f)[1]) @@ -693,7 +693,7 @@ class DeviceGUI(object): rows_are_ids=True) names = [] for mi in metadata: - prefix = sanitize_file_name(mi['title']) + prefix = ascii_filename(mi['title']) if not isinstance(prefix, unicode): prefix = prefix.decode(preferred_encoding, 'replace') prefix = ascii_filename(prefix) @@ -758,7 +758,7 @@ class DeviceGUI(object): a = mi['authors'] if not a: a = _('Unknown') - prefix = sanitize_file_name(t+' - '+a) + prefix = ascii_filename(t+' - '+a) if not isinstance(prefix, unicode): prefix = prefix.decode(preferred_encoding, 'replace') prefix = ascii_filename(prefix) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index b4b8494c64..5874e57599 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -14,8 +14,9 @@ from PyQt4.Qt import Qt, SIGNAL, QObject, QCoreApplication, QUrl, QTimer, \ QMessageBox, QStackedLayout from PyQt4.QtSvg import QSvgRenderer -from calibre import __version__, __appname__, sanitize_file_name, \ +from calibre import __version__, __appname__, \ iswindows, isosx, prints, patheq +from calibre.utils.filenames import ascii_filename from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import prefs, dynamic from calibre.utils.ipc.server import Server @@ -852,7 +853,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): def _files_added(self, paths=[], names=[], infos=[], on_card=None): if paths: self.upload_books(paths, - list(map(sanitize_file_name, names)), + list(map(ascii_filename, names)), infos, on_card=on_card) self.status_bar.showMessage( _('Uploading books to device.'), 2000) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index d7344b5681..c04e27aa75 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -34,7 +34,7 @@ from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_e from calibre.ptempfile import PersistentTemporaryFile from calibre.customize.ui import run_plugins_on_import -from calibre import sanitize_file_name +from calibre.utils.filenames import ascii_filename from calibre.ebooks import BOOK_EXTENSIONS if iswindows: @@ -652,8 +652,8 @@ class LibraryDatabase2(LibraryDatabase): authors = self.authors(id, index_is_id=True) if not authors: authors = _('Unknown') - author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') - title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') + author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') + title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') path = author + '/' + title + ' (%d)'%id return path @@ -664,8 +664,8 @@ class LibraryDatabase2(LibraryDatabase): authors = self.authors(id, index_is_id=True) if not authors: authors = _('Unknown') - author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') - title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') + author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') + title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') name = title + ' - ' + author while name.endswith('.'): name = name[:-1] @@ -1520,12 +1520,12 @@ class LibraryDatabase2(LibraryDatabase): x['cover'] = os.path.join(path, 'cover.jpg') if not self.has_cover(x['id'], index_is_id=True): x['cover'] = None - path += os.sep + self.construct_file_name(record[FIELD_MAP['id']]) + '.%s' formats = self.formats(record[FIELD_MAP['id']], index_is_id=True) if formats: for fmt in formats.split(','): - x['formats'].append(path%fmt.lower()) - x['fmt_'+fmt.lower()] = path%fmt.lower() + path = self.format_abspath(x['id'], fmt, index_is_id=True) + x['formats'].append(path) + x['fmt_'+fmt.lower()] = path x['available_formats'] = [i.upper() for i in formats.split(',')] return data @@ -1602,12 +1602,12 @@ books_series_link feeds by_author[au] = [] by_author[au].append(index) for au in by_author.keys(): - apath = os.path.join(dir, sanitize_file_name(au)) + apath = os.path.join(dir, ascii_filename(au)) if not single_dir and not os.path.exists(apath): os.mkdir(apath) for idx in by_author[au]: title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id)) - tpath = os.path.join(apath, sanitize_file_name(title)) + tpath = os.path.join(apath, ascii_filename(title)) id = idx if index_is_id else self.id(idx) id = str(id) if not single_dir and not os.path.exists(tpath): @@ -1621,10 +1621,10 @@ books_series_link feeds mi.authors = [_('Unknown')] cdata = self.cover(int(id), index_is_id=True) if cdata is not None: - cname = sanitize_file_name(name)+'.jpg' + cname = ascii_filename(name)+'.jpg' open(os.path.join(base, cname), 'wb').write(cdata) mi.cover = cname - with open(os.path.join(base, sanitize_file_name(name)+'.opf'), + with open(os.path.join(base, ascii_filename(name)+'.opf'), 'wb') as f: f.write(metadata_to_opf(mi)) @@ -1636,7 +1636,7 @@ books_series_link feeds if not data: continue fname = name +'.'+fmt.lower() - fname = sanitize_file_name(fname) + fname = ascii_filename(fname) f = open(os.path.join(base, fname), 'w+b') f.write(data) f.flush() @@ -1671,7 +1671,7 @@ books_series_link feeds if not au: au = _('Unknown') fname = '%s - %s.%s'%(title, au, format.lower()) - fname = sanitize_file_name(fname) + fname = ascii_filename(fname) if not os.path.exists(dir): os.makedirs(dir) f = open(os.path.join(dir, fname), 'w+b') diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index f956c4ee10..238ab343a9 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -14,8 +14,8 @@ from httplib import responses from PIL import Image from cStringIO import StringIO -from calibre import browser, sanitize_file_name, \ - relpath, unicode_path +from calibre import browser, relpath, unicode_path +from calibre.utils.filenames import ascii_filename from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.config import OptionParser @@ -313,7 +313,7 @@ class RecursiveFetcher(object): self.log.exception('Could not fetch image %s'% iurl) continue c += 1 - fname = sanitize_file_name('img'+str(c)+ext) + fname = ascii_filename('img'+str(c)+ext) if isinstance(fname, unicode): fname = fname.encode('ascii', 'replace') imgpath = os.path.join(diskpath, fname+'.jpg') @@ -416,7 +416,7 @@ class RecursiveFetcher(object): if not isinstance(_fname, unicode): _fname.decode('latin1', 'replace') _fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '') - _fname = sanitize_file_name(_fname) + _fname = ascii_filename(_fname) _fname = os.path.splitext(_fname)[0]+'.xhtml' res = os.path.join(linkdiskpath, _fname) self.downloaded_paths.append(res) From 8a4754f1d08f7c07051508053f5c122792a86eef Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 10:55:14 -0600 Subject: [PATCH 03/10] Fix #2934 (Crash when aborting cover download) --- src/calibre/gui2/dialogs/metadata_single.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 10a81be9f5..d349298609 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -526,3 +526,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): QDialog.accept(self) if callable(self.accepted_callback): self.accepted_callback(self.id) + + def reject(self, *args): + cf = getattr(self, 'cover_fetcher', None) + if cf is not None and hasattr(cf, 'terminate'): + cf.terminate() + cf.wait() + + QDialog.reject(self, *args) From 2436866707270c60260004df77492bce4520bb31 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 11:08:25 -0600 Subject: [PATCH 04/10] Fix #2938 (comic2lrf gives EOFerror) --- src/calibre/ebooks/comic/input.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py index 2eed12de0c..c039b06676 100755 --- a/src/calibre/ebooks/comic/input.py +++ b/src/calibre/ebooks/comic/input.py @@ -253,7 +253,6 @@ def process_pages(pages, opts, update, tdir): for job in jobs: if job.failed: - raw_input() raise Exception(_('Failed to process comic: \n\n%s')% job.log_file.read()) pages, failures_ = job.result From fa603d130c3cac184871262ef26b55cc40333752 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 11:29:28 -0600 Subject: [PATCH 05/10] Fix #2930 (No book is selected after deleting a book) --- src/calibre/gui2/main.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 5874e57599..3da3a33c69 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -889,7 +889,17 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): 'removed from your computer. Are you sure?') +'

', 'library_delete_books', self): return + ci = view.currentIndex() + row = None + if ci.isValid(): + row = ci.row() view.model().delete_books(rows) + if row is not None: + ci = view.model().index(row, 0) + if ci.isValid(): + view.setCurrentIndex(ci) + sm = view.selectionModel() + sm.select(ci, sm.Select) else: if self.stack.currentIndex() == 1: view = self.memory_view From 0b4d97a8a474667c09631582fe742f718f906aca Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 11:44:01 -0600 Subject: [PATCH 06/10] Fix file selection dialogs not choosing the correct file extension filter by default --- src/calibre/gui2/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index f5dcdcfebe..12f6fb2025 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -342,6 +342,8 @@ class FileDialog(QObject): ftext += '%s (%s);;'%(text, ' '.join(extensions)) if add_all_files_filter or not ftext: ftext += 'All files (*)' + if ftext.endswith(';;'): + ftext = ftext[:-2] self.dialog_name = name if name else 'dialog_' + title self.selected_files = None From 2c5d95113912ab6fac4e845a92337c7187bbeabc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 11:54:18 -0600 Subject: [PATCH 07/10] IGN:Remove note about libphonon from download page --- src/calibre/trac/plugins/templates/linux.html | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/calibre/trac/plugins/templates/linux.html b/src/calibre/trac/plugins/templates/linux.html index 58c3e8ad6a..b948dccc74 100644 --- a/src/calibre/trac/plugins/templates/linux.html +++ b/src/calibre/trac/plugins/templates/linux.html @@ -82,10 +82,6 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal.

Note

    -
  • On some linux distributions, you have to install the - libphonon (may be called libphonon4) package for calibre - to work. -
  • When running the command line utilities, they will segfault after completion. This can From a80d7050495754c4893ce5e86f6e9b3860498bea Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 12:09:21 -0600 Subject: [PATCH 08/10] lrs2lrf: Handle missing style labels gracefully --- src/calibre/ebooks/lrf/lrs/convert_from.py | 82 +++++++++++----------- src/calibre/utils/filenames.py | 14 +++- 2 files changed, 54 insertions(+), 42 deletions(-) diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py index 86a97aa70b..fd0dd91be0 100644 --- a/src/calibre/ebooks/lrf/lrs/convert_from.py +++ b/src/calibre/ebooks/lrf/lrs/convert_from.py @@ -18,38 +18,38 @@ from calibre.ebooks.lrf.pylrs.pylrs import Book, PageStyle, TextStyle, \ from calibre.ebooks.chardet import xml_to_unicode class LrsParser(object): - - SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space', - 'PutObj', 'RuledLine', + + SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space', + 'PutObj', 'RuledLine', 'Plot', 'SetDefault', 'BookSetting', 'RegistFont', 'PageStyle', 'TextStyle', 'BlockStyle', 'JumpTo', 'ImageStream', 'Image']] - + def __init__(self, stream, logger): self.logger = logger src = stream.read() self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0], - convertEntities=BeautifulStoneSoup.XML_ENTITIES, + convertEntities=BeautifulStoneSoup.XML_ENTITIES, selfClosingTags=self.SELF_CLOSING_TAGS) self.objects = {} for obj in self.soup.findAll(objid=True): self.objects[obj['objid']] = obj - + self.parsed_objects = {} self.first_pass() self.second_pass() self.third_pass() self.fourth_pass() self.fifth_pass() - + def fifth_pass(self): for tag in self.soup.findAll(['canvas', 'header', 'footer']): canvas = self.parsed_objects[tag.get('objid')] for po in tag.findAll('putobj'): canvas.put_object(self.parsed_objects[po.get('refobj')], po.get('x1'), po.get('y1')) - - + + @classmethod def attrs_to_dict(cls, tag, exclude=('objid',)): result = {} @@ -58,7 +58,7 @@ class LrsParser(object): continue result[str(key)] = val return result - + def text_tag_to_element(self, tag): map = { 'span' : Span, @@ -77,7 +77,7 @@ class LrsParser(object): settings = self.attrs_to_dict(tag) settings.pop('spanstyle', '') return map[tag.name](**settings) - + def process_text_element(self, tag, elem): for item in tag.contents: if isinstance(item, NavigableString): @@ -86,8 +86,8 @@ class LrsParser(object): subelem = self.text_tag_to_element(item) elem.append(subelem) self.process_text_element(item, subelem) - - + + def process_paragraph(self, tag): p = Paragraph() contents = [i for i in tag.contents] @@ -104,7 +104,7 @@ class LrsParser(object): p.append(elem) self.process_text_element(item, elem) return p - + def process_text_block(self, tag): tb = self.parsed_objects[tag.get('objid')] for item in tag.contents: @@ -119,25 +119,25 @@ class LrsParser(object): elem = self.text_tag_to_element(item) self.process_text_element(item, elem) p.append(elem) - + def fourth_pass(self): for tag in self.soup.findAll('page'): page = self.parsed_objects[tag.get('objid')] self.book.append(page) - for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', + for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', 'ruledline', 'simpletextblock']): if block_tag.name == 'ruledline': page.append(RuledLine(**self.attrs_to_dict(block_tag))) else: page.append(self.parsed_objects[block_tag.get('objid')]) - + for tag in self.soup.find('objects').findAll('button'): jt = tag.find('jumpto') tb = self.parsed_objects[jt.get('refobj')] jb = JumpButton(tb) self.book.append(jb) self.parsed_objects[tag.get('objid')] = jb - + for tag in self.soup.findAll(['textblock', 'simpletextblock']): self.process_text_block(tag) toc = self.soup.find('toc') @@ -145,11 +145,11 @@ class LrsParser(object): for tag in toc.findAll('toclabel'): label = self.tag_to_string(tag) self.book.addTocEntry(label, self.parsed_objects[tag.get('refobj')]) - - + + def third_pass(self): map = { - 'page' : (Page, ['pagestyle', 'evenfooterid', + 'page' : (Page, ['pagestyle', 'evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid']), 'textblock' : (TextBlock, ['textstyle', 'blockstyle']), 'simpletextblock' : (TextBlock, ['textstyle', 'blockstyle']), @@ -167,7 +167,7 @@ class LrsParser(object): settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel']) for a in ('pagestyle', 'blockstyle', 'textstyle'): label = tag.get(a, False) - if label: + if label and label in self._style_labels: _obj = self.parsed_objects[label] if \ self.parsed_objects.has_key(label) else \ self._style_labels[label] @@ -181,9 +181,9 @@ class LrsParser(object): if tag.has_key('canvaswidth'): args += [tag.get('canvaswidth'), tag.get('canvasheight')] self.parsed_objects[id] = map[tag.name][0](*args, **settings) - - - + + + def second_pass(self): map = { 'pagestyle' : (PageStyle, ['stylelabel', 'evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid']), @@ -207,8 +207,8 @@ class LrsParser(object): self._style_labels[x] = self.parsed_objects[id] if tag.name == 'registfont': self.book.append(self.parsed_objects[id]) - - + + @classmethod def tag_to_string(cls, tag): ''' @@ -226,20 +226,20 @@ class LrsParser(object): res = cls.tag_to_string(item) if res: strings.append(res) - return u''.join(strings) - + return u''.join(strings) + def first_pass(self): info = self.soup.find('bbebxylog').find('bookinformation').find('info') bookinfo = info.find('bookinfo') docinfo = info.find('docinfo') - + def me(base, tagname): tag = base.find(tagname.lower()) if tag is None: return ('', '', '') tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '') return tag - + title = me(bookinfo, 'Title') author = me(bookinfo, 'Author') publisher = me(bookinfo, 'Publisher') @@ -250,12 +250,12 @@ class LrsParser(object): creator = me(docinfo, 'Creator')[0] producer = me(docinfo, 'Producer')[0] bookid = me(bookinfo, 'BookID')[0] - + sd = self.soup.find('setdefault') sd = StyleDefault(**self.attrs_to_dict(sd, ['page_tree_id', 'rubyalignandadjust'])) bs = self.soup.find('booksetting') bs = BookSetting(**self.attrs_to_dict(bs, [])) - + settings = {} thumbnail = self.soup.find('cthumbnail') if thumbnail is not None: @@ -264,23 +264,23 @@ class LrsParser(object): settings['thumbnail'] = f else: print _('Could not read from thumbnail file:'), f - + self.book = Book(title=title, author=author, publisher=publisher, category=category, classification=classification, freetext=freetext, language=language, creator=creator, producer=producer, bookid=bookid, setdefault=sd, booksetting=bs, **settings) - + for hdr in self.soup.findAll(['header', 'footer']): elem = Header if hdr.name == 'header' else Footer - self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr)) - + self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr)) + def render(self, file, to_lrs=False): if to_lrs: self.book.renderLrs(file, 'utf-8') else: self.book.renderLrf(file) - + def option_parser(): parser = OptionParser(usage=_('%prog [options] file.lrs\nCompile an LRS file into an LRF file.')) @@ -299,7 +299,7 @@ def main(args=sys.argv, logger=None): level = logging.DEBUG if opts.verbose else logging.INFO logger = logging.getLogger('lrs2lrf') setup_cli_handlers(logger, level) - + if len(args) != 2: parser.print_help() return 1 @@ -310,7 +310,7 @@ def main(args=sys.argv, logger=None): if opts.verbose: import warnings warnings.defaultaction = 'error' - + logger.info('Parsing LRS file...') converter = LrsParser(open(args[1], 'rb'), logger) logger.info('Writing to output file...') @@ -320,4 +320,4 @@ def main(args=sys.argv, logger=None): if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file + sys.exit(main()) diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py index f243237775..9146cb017d 100644 --- a/src/calibre/utils/filenames.py +++ b/src/calibre/utils/filenames.py @@ -5,7 +5,19 @@ meaning as possible. from calibre.ebooks.unidecode.unidecoder import Unidecoder from calibre import sanitize_file_name +from calibre.constants import preferred_encoding udc = Unidecoder() +def ascii_text(orig): + try: + ascii = udc.decode(orig) + except: + if isinstance(orig, unicode): + ascii = orig.encode('ascii', 'replace') + ascii = orig.decode(preferred_encoding, + 'replace').encode('ascii', 'replace') + return ascii + + def ascii_filename(orig): - return sanitize_file_name(udc.decode(orig).replace('?', '_')) + return sanitize_file_name(ascii_text(orig).replace('?', '_')) From 522e75cf6db501e655f21c55792afedf77bd8b54 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 12:23:29 -0600 Subject: [PATCH 09/10] Fix insert metadata inserting tags around Series and Tags fields --- src/calibre/ebooks/oeb/transforms/jacket.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index add9f85e08..6d36ef44fa 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -65,9 +65,9 @@ class Jacket(object): if not comments.strip(): comments = '' comments = comments.replace('\r\n', '\n').replace('\n\n', '

    ') - series = 'Series: ' + mi.series if mi.series else '' + series = 'Series: ' + escape(mi.series if mi.series else '') if series and mi.series_index is not None: - series += ' [%s]'%mi.format_series_index() + series += escape(' [%s]'%mi.format_series_index()) tags = mi.tags if not tags: try: @@ -75,7 +75,7 @@ class Jacket(object): except: tags = [] if tags: - tags = 'Tags: ' + self.opts.dest.tags_to_string(tags) + tags = 'Tags: ' + escape(self.opts.dest.tags_to_string(tags)) else: tags = '' try: @@ -84,8 +84,8 @@ class Jacket(object): title = _('Unknown') html = self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'], title=escape(title), comments=escape(comments), - jacket=escape(_('Book Jacket')), series=escape(series), - tags=escape(tags)) + jacket=escape(_('Book Jacket')), series=series, + tags=tags) id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml') root = etree.fromstring(html) item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) From 64114c0847b453a824672a27153cb2556afaaa5a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 12:24:56 -0600 Subject: [PATCH 10/10] IGN:... --- src/calibre/ebooks/lrf/lrs/convert_from.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py index fd0dd91be0..e0ce88c2b9 100644 --- a/src/calibre/ebooks/lrf/lrs/convert_from.py +++ b/src/calibre/ebooks/lrf/lrs/convert_from.py @@ -167,7 +167,8 @@ class LrsParser(object): settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel']) for a in ('pagestyle', 'blockstyle', 'textstyle'): label = tag.get(a, False) - if label and label in self._style_labels: + if label and \ + (label in self._style_labels or label in self.parsed_objects): _obj = self.parsed_objects[label] if \ self.parsed_objects.has_key(label) else \ self._style_labels[label]