From 9184d8cd0a8e27ce5605630009aa48bd4d4e2f72 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 4 Feb 2011 16:58:51 -0700 Subject: [PATCH 1/6] ... --- src/calibre/gui2/dialogs/metadata_single.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index aec8c4fd60..3e711edd2d 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -951,8 +951,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): for w in getattr(self, 'custom_column_widgets', []): self.books_to_refresh |= w.commit(self.id) self.db.commit() - except IOError, err: - if err.errno == 13: # Permission denied + except (IOError, OSError) as err: + if getattr(err, 'errno', -1) == 13: # Permission denied fname = err.filename if err.filename else 'file' return error_dialog(self, _('Permission denied'), _('Could not open %s. Is it being used by another' From 746ca10ae58ea35bb9ffe47b0bb60f87145de0ef Mon Sep 17 00:00:00 2001 From: John Schember Date: Fri, 4 Feb 2011 19:10:53 -0500 Subject: [PATCH 2/6] Restructure TXT input processing to apply paragraph transformations when specified or detected with auto no matter the formatting type specified or detected. --- src/calibre/ebooks/txt/input.py | 49 +++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 2399e599ae..e1392ef732 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -57,6 +57,7 @@ class TXTInput(InputFormatPlugin): log.debug('Reading text from file...') txt = stream.read() + # Get the encoding of the document. if options.input_encoding: ienc = options.input_encoding @@ -70,13 +71,16 @@ class TXTInput(InputFormatPlugin): log.debug('No input encoding specified and could not auto detect using %s' % ienc) txt = txt.decode(ienc, 'replace') + # Replace entities txt = _ent_pat.sub(xml_entity_to_unicode, txt) # Normalize line endings txt = normalize_line_endings(txt) + # Detect formatting if options.formatting_type == 'auto': options.formatting_type = detect_formatting_type(txt) + log.debug('Auto detected formatting as %s' % options.formatting_type) if options.formatting_type == 'heuristic': setattr(options, 'enable_heuristics', True) @@ -105,41 +109,43 @@ class TXTInput(InputFormatPlugin): docanalysis = DocAnalysis('txt', txt) length = docanalysis.line_length(.5) + # Reformat paragraphs to block formatting based on the detected type. + # We don't check for block because the processor assumes block. + # single and print at transformed to block for processing. + if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted': + txt = separate_paragraphs_single_line(txt) + elif options.paragraph_type == 'print': + txt = separate_paragraphs_print_formatted(txt) + elif options.paragraph_type == 'unformatted': + from calibre.ebooks.conversion.utils import HeuristicProcessor + # unwrap lines based on punctuation + preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None)) + txt = preprocessor.punctuation_unwrap(length, txt, 'txt') + + # Process the text using the appropriate text processor. + html = '' if options.formatting_type == 'markdown': - log.debug('Running text though markdown conversion...') + log.debug('Running text through markdown conversion...') try: html = convert_markdown(txt, disable_toc=options.markdown_disable_toc) except RuntimeError: raise ValueError('This txt file has malformed markup, it cannot be' ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') elif options.formatting_type == 'textile': - log.debug('Running text though textile conversion...') + log.debug('Running text through textile conversion...') html = convert_textile(txt) else: - # Dehyphenate - dehyphenator = Dehyphenator(options.verbose, log=self.log) - txt = dehyphenator(txt,'txt', length) - - # We don't check for block because the processor assumes block. - # single and print at transformed to block for processing. - - if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted': - txt = separate_paragraphs_single_line(txt) - elif options.paragraph_type == 'print': - txt = separate_paragraphs_print_formatted(txt) - - if options.paragraph_type == 'unformatted': - from calibre.ebooks.conversion.utils import HeuristicProcessor - # get length - - # unwrap lines based on punctuation - preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None)) - txt = preprocessor.punctuation_unwrap(length, txt, 'txt') + log.debug('Running text through basic conversion...') + if options.formatting_type == 'heuristic': + # Dehyphenate + dehyphenator = Dehyphenator(options.verbose, log=self.log) + txt = dehyphenator(txt,'txt', length) flow_size = getattr(options, 'flow_size', 0) html = convert_basic(txt, epub_split_size_kb=flow_size) + # Run the HTMLized text through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: @@ -158,6 +164,7 @@ class TXTInput(InputFormatPlugin): htmlfile.write(html.encode('utf-8')) odi = options.debug_pipeline options.debug_pipeline = None + # Generate oeb from htl conversion. oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log, {}) options.debug_pipeline = odi From a046e8d0964de7ed3b9b7e1145d167b859a4b7eb Mon Sep 17 00:00:00 2001 From: John Schember Date: Fri, 4 Feb 2011 21:27:04 -0500 Subject: [PATCH 3/6] Tweak to append separator character to completed text. --- resources/default_tweaks.py | 7 +++++++ src/calibre/gui2/complete.py | 17 ++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 32aeba9122..f1abfbe7ea 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -30,6 +30,13 @@ defaults. series_index_auto_increment = 'next' +# Should the completion separator be append +# to the end of the completed text to +# automatically begin a new completion operation. +# Can be either True or False +completer_append_separator = False + + # The algorithm used to copy author to author_sort # Possible values are: # invert: use "fn ln" -> "ln, fn" (the original algorithm) diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py index f589b30679..a013065690 100644 --- a/src/calibre/gui2/complete.py +++ b/src/calibre/gui2/complete.py @@ -10,6 +10,7 @@ from PyQt4.Qt import QLineEdit, QListView, QAbstractListModel, Qt, QTimer, \ QApplication, QPoint, QItemDelegate, QStyleOptionViewItem, \ QStyle, QEvent, pyqtSignal +from calibre.utils.config import tweaks from calibre.utils.icu import sort_key, lower from calibre.gui2 import NONE from calibre.gui2.widgets import EnComboBox @@ -231,12 +232,18 @@ class MultiCompleteLineEdit(QLineEdit): cursor_pos = self.cursorPosition() before_text = unicode(self.text())[:cursor_pos] after_text = unicode(self.text())[cursor_pos:] - after_parts = after_text.split(self.sep) - if len(after_parts) < 3 and not after_parts[-1].strip(): - after_text = u'' prefix_len = len(before_text.split(self.sep)[-1].lstrip()) - return prefix_len, \ - before_text[:cursor_pos - prefix_len] + text + after_text + if tweaks['completer_append_separator']: + prefix_len = len(before_text.split(self.sep)[-1].lstrip()) + completed_text = before_text[:cursor_pos - prefix_len] + text + self.sep + ' ' + after_text + prefix_len = prefix_len - len(self.sep) - 1 + if prefix_len < 0: + prefix_len = 0 + else: + prefix_len = len(before_text.split(self.sep)[-1].lstrip()) + completed_text = before_text[:cursor_pos - prefix_len] + text + after_text + return prefix_len, completed_text + def completion_selected(self, text): prefix_len, ctext = self.get_completed_text(text) From 480fd141e00c92e030f13ab4d327da29519e7e54 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 5 Feb 2011 05:11:24 +0000 Subject: [PATCH 4/6] Fix #8765: Custom series like column weird behaviour --- src/calibre/gui2/library/models.py | 10 ++++++++++ src/calibre/library/custom_columns.py | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 0b6991665b..2f8a747c39 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -791,6 +791,16 @@ class BooksModel(QAbstractTableModel): # {{{ val = qt_to_dt(val, as_utc=False) elif typ == 'series': val = unicode(value.toString()).strip() + if val: + pat = re.compile(r'\[([.0-9]+)\]') + match = pat.search(val) + if match is not None: + s_index = float(match.group(1)) + val = pat.sub('', val).strip() + elif val: + if tweaks['series_index_auto_increment'] != 'const': + s_index = self.db.get_next_cc_series_num_for(val, + label=label, num=None) elif typ == 'composite': tmpl = unicode(value.toString()).strip() disp = cc['display'] diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py index 467a3f309e..cb735dc529 100644 --- a/src/calibre/library/custom_columns.py +++ b/src/calibre/library/custom_columns.py @@ -484,7 +484,9 @@ class CustomColumns(object): if not existing: existing = [] for x in set(set_val) - set(existing): - if x is None: + # normalized types are text and ratings, so we can do this check + # to see if we need to re-add the value + if not x: continue case_change = False existing = list(self.all_custom(num=data['num'])) From ee420551cb7206b683f359b879ed40f69c505cc2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 5 Feb 2011 09:46:52 -0700 Subject: [PATCH 5/6] FIx regression that broke the convenience Email to xxx entry in the connect share menu. Fixes #8775 (0.7.44 - Problem with sending via email function - seems not to work as 0.7.43) --- src/calibre/gui2/actions/device.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/gui2/actions/device.py b/src/calibre/gui2/actions/device.py index b32568f8fd..429bc641d0 100644 --- a/src/calibre/gui2/actions/device.py +++ b/src/calibre/gui2/actions/device.py @@ -94,6 +94,7 @@ class ShareConnMenu(QMenu): # {{{ I('mail.png'), _('Email to') + ' ' +account) self.addAction(ac) self.email_actions.append(ac) + ac.a_s.connect(sync_menu.action_triggered) action1.a_s.connect(sync_menu.action_triggered) action2.a_s.connect(sync_menu.action_triggered) ac = self.addMenu(self.email_to_and_delete_menu) From b5fd4a07d893e87c1e8c140aafb372c8e5624303 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 5 Feb 2011 10:00:33 -0700 Subject: [PATCH 6/6] RTF Input: More encoding token splitting fixes. --- src/calibre/ebooks/rtf2xml/ParseRtf.py | 12 +++---- src/calibre/ebooks/rtf2xml/colors.py | 2 +- src/calibre/ebooks/rtf2xml/process_tokens.py | 14 +++++---- src/calibre/ebooks/rtf2xml/tokenize.py | 33 ++++++-------------- 4 files changed, 25 insertions(+), 36 deletions(-) diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py index 831183f0dd..0fc1c431db 100755 --- a/src/calibre/ebooks/rtf2xml/ParseRtf.py +++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py @@ -226,7 +226,7 @@ class ParseRtf: try: return_value = process_tokens_obj.process_tokens() except InvalidRtfException, msg: - #Check to see if the file is correctly encoded + # Check to see if the file is correctly encoded encode_obj = default_encoding.DefaultEncoding( in_file = self.__temp_file, run_level = self.__run_level, @@ -237,14 +237,14 @@ class ParseRtf: check_encoding_obj = check_encoding.CheckEncoding( bug_handler = RtfInvalidCodeException, ) - enc = 'cp' + encode_obj.get_codepage() - if enc == 'cp10000': - enc = 'mac_roman' - msg = 'Exception in token processing' + enc = encode_obj.get_codepage() + if enc != 'mac_roman': + enc = 'cp' + enc + msg = '%s\nException in token processing' % str(msg) if check_encoding_obj.check_encoding(self.__file, enc): file_name = self.__file if isinstance(self.__file, str) \ else self.__file.encode('utf-8') - msg = 'File %s does not appear to be correctly encoded.\n' % file_name + msg +='\nFile %s does not appear to be correctly encoded.\n' % file_name try: os.remove(self.__temp_file) except OSError: diff --git a/src/calibre/ebooks/rtf2xml/colors.py b/src/calibre/ebooks/rtf2xml/colors.py index eba03547c8..e85b59571c 100755 --- a/src/calibre/ebooks/rtf2xml/colors.py +++ b/src/calibre/ebooks/rtf2xml/colors.py @@ -210,7 +210,7 @@ class Colors: hex_num = self.__color_dict.get(num) if hex_num is None: hex_num = '0' - if self.__run_level > 5: + if self.__run_level > 3: msg = 'no value in self.__color_dict' \ 'for key %s at line %d\n' % (num, self.__line) raise self.__bug_handler, msg diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py index c6cf124425..65162d0d37 100755 --- a/src/calibre/ebooks/rtf2xml/process_tokens.py +++ b/src/calibre/ebooks/rtf2xml/process_tokens.py @@ -786,21 +786,23 @@ class ProcessTokens: token = line.replace("\n","") line_count += 1 if line_count == 1 and token != '\\{': - msg = 'Invalid RTF: document doesn\'t start with {\n' + msg = '\nInvalid RTF: document doesn\'t start with {\n' raise self.__exception_handler, msg elif line_count == 2 and token[0:4] != '\\rtf': - msg = 'Invalid RTF: document doesn\'t start with \\rtf \n' + msg = '\nInvalid RTF: document doesn\'t start with \\rtf \n' raise self.__exception_handler, msg the_index = token.find('\\ ') if token is not None and the_index > -1: - msg = 'Invalid RTF: token "\\ " not valid.\n' + msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\ + % line_count raise self.__exception_handler, msg elif token[:1] == "\\": try: token.decode('us-ascii') except UnicodeError, msg: - msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg) + msg = '\nInvalid RTF: Tokens not ascii encoded.\n%s\nError at line %d'\ + % (str(msg), line_count) raise self.__exception_handler, msg line = self.process_cw(token) if line is not None: @@ -816,7 +818,7 @@ class ProcessTokens: write_obj.write('tx\n\g<2>", input_file) input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file) input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file) #remove \n in bin data @@ -139,17 +140,17 @@ class Tokenize: "\\_": "\\_ ", "\\:": "\\: ", "\\-": "\\- ", - # turn into a generic token to eliminate special - # cases and make processing easier + #turn into a generic token to eliminate special + #cases and make processing easier "\\{": "\\ob ", - # turn into a generic token to eliminate special - # cases and make processing easier + #turn into a generic token to eliminate special + #cases and make processing easier "\\}": "\\cb ", - # put a backslash in front of to eliminate special cases and - # make processing easier + #put a backslash in front of to eliminate special cases and + #make processing easier "{": "\\{", - # put a backslash in front of to eliminate special cases and - # make processing easier + #put a backslash in front of to eliminate special cases and + #make processing easier "}": "\\}", } self.__replace_spchar = MReplace(SIMPLE_RPL) @@ -165,21 +166,9 @@ class Tokenize: #remove \n from endline char self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)") #this is for old RTF - self.__par_exp = re.compile(r'\\\n+') + self.__par_exp = re.compile(r'(\\\n+|\\ )') #handle cw using a digit as argument and without space as delimiter self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)") - #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}") - #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})") - #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)") - #self.__remove_line = re.compile(r'\n+') - ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)") - - def __correct_spliting(self, token): - match_obj = re.search(self.__cwdigit_exp, token) - if match_obj is None: - return token - else: - return '%s\n%s' % (match_obj.group(1), match_obj.group(2)) def tokenize(self): """Main class for handling other methods. Reads the file \ @@ -196,8 +185,6 @@ class Tokenize: tokens = map(self.__unicode_process, tokens) #remove empty items created by removing \uc tokens = filter(lambda x: len(x) > 0, tokens) - #handles bothersome cases - tokens = map(self.__correct_spliting, tokens) #write with open(self.__write_to, 'wb') as write_obj: