Merge from trunk

2025-11-22 06:23:02 -05:00 · 2011-01-16 10:40:42 +00:00 · 2011-01-16 10:40:42 +00:00 · 04869f7f5d
commit 04869f7f5d
parent e0b2d0b62a 5d9f9325a2
15 changed files with 270 additions and 193 deletions
--- a/resources/recipes/sportsillustrated.recipe
+++ b/resources/recipes/sportsillustrated.recipe
@ -1,5 +1,5 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+#from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from urllib import quote
 class SportsIllustratedRecipe(BasicNewsRecipe) :
@ -91,7 +91,7 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
        #   expire : no idea what value to use
        # All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js
-    def preprocess_html(self, soup):
+    '''def preprocess_html(self, soup):
        header = soup.find('div', attrs = {'class' : 'siv_artheader'})
        homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
        body = homeMadeSoup.body
@ -115,4 +115,5 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
            body.append(para)
        return homeMadeSoup
        '''
--- a/src/calibre/devices/sne/driver.py
+++ b/src/calibre/devices/sne/driver.py
@ -33,6 +33,6 @@ class SNE(USBMS):
    STORAGE_CARD_VOLUME_LABEL = 'SNE Storage Card'
    EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Books'
-    SUPPORTS_SUB_DIRS = True
+    SUPPORTS_SUB_DIRS = False
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -286,7 +286,6 @@ class RTFInput(InputFormatPlugin):
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException, e:
            raise
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.\n%s')%e)
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -226,10 +226,6 @@ class ParseRtf:
        try:
            return_value = process_tokens_obj.process_tokens()
        except InvalidRtfException, msg:
            try:
                os.remove(self.__temp_file)
            except OSError:
                pass
            #Check to see if the file is correctly encoded
            encode_obj = default_encoding.DefaultEncoding(
            in_file = self.__temp_file,
@ -241,14 +237,17 @@ class ParseRtf:
            check_encoding_obj = check_encoding.CheckEncoding(
                    bug_handler = RtfInvalidCodeException,
                        )
-            enc = encode_obj.get_codepage()
+            enc = 'cp' + encode_obj.get_codepage()
-            if enc != 'mac_roman':
+            msg = 'Exception in token processing'
                enc = 'cp' + enc
            if check_encoding_obj.check_encoding(self.__file, enc):
                file_name = self.__file if isinstance(self.__file, str) \
                                    else self.__file.encode('utf-8')
                msg = 'File %s does not appear to be correctly encoded.\n' % file_name
-                raise InvalidRtfException, msg
+            try:
                os.remove(self.__temp_file)
            except OSError:
                pass
            raise InvalidRtfException, msg
        delete_info_obj = delete_info.DeleteInfo(
            in_file = self.__temp_file,
            copy = self.__copy,
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@ -74,9 +74,6 @@ class DefaultEncoding:
        if not self.__datafetched:
            self._encoding()
            self.__datafetched = True
        if self.__platform == 'Macintosh':
            code_page = self.__code_page
        else:
            code_page = 'ansicpg' + self.__code_page
        return self.__platform, code_page, self.__default_num
@ -94,49 +91,60 @@ class DefaultEncoding:
    def _encoding(self):
        with open(self.__file, 'r') as read_obj:
            cpfound = False
            if not self.__fetchraw:
                for line in read_obj:
                    self.__token_info = line[:16]
                    if self.__token_info == 'mi<mk<rtfhed-end':
                        break
                    if self.__token_info == 'cw<ri<ansi-codpg':
                        #cw<ri<ansi-codpg<nu<10000
                        self.__code_page = line[20:-1] if int(line[20:-1]) \
                                            else '1252'
                    if self.__token_info == 'cw<ri<macintosh_':
                        self.__platform = 'Macintosh'
                        self.__code_page = 'mac_roman'
                    elif self.__token_info == 'cw<ri<pc________':
                        self.__platform = 'IBMPC'
                        self.__code_page = '437'
                    elif self.__token_info == 'cw<ri<pca_______':
                        self.__platform = 'OS/2'
-                        self.__code_page = '850'
+                    if self.__token_info == 'cw<ri<ansi-codpg' \
                        and int(line[20:-1]):
                            self.__code_page = line[20:-1]
                    if self.__token_info == 'cw<ri<deflt-font':
                        self.__default_num = line[20:-1]
                        cpfound = True
                        #cw<ri<deflt-font<nu<0
                if self.__platform != 'Windows' and \
                        not cpfound:
                    if self.__platform == 'Macintosh':
                       self.__code_page = '10000'
                    elif self.__platform == 'IBMPC':
                        self.__code_page = '437'
                    elif self.__platform == 'OS/2':
                        self.__code_page = '850'
            else:
                fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
                fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
                for line in read_obj:
                    if fenc.search(line):
                        enc = fenc.search(line).group(1)
                    if fenccp.search(line):
                        cp = fenccp.search(line).group(1)
                        if not int(cp):
                            self.__code_page = cp
                        cpfound = True
                        break
-                    if fenc.search(line):
+                if self.__platform != 'Windows' and \
-                        enc = fenc.search(line).group(1)
+                        not cpfound:
-                        if enc == 'mac':
+                    if enc == 'mac':
-                            self.__code_page = 'mac_roman'
+                        self.__code_page = '10000'
-                        elif enc == 'pc':
+                    elif enc == 'pc':
-                            self.__code_page = '437'
+                        self.__code_page = '437'
-                        elif enc == 'pca':
+                    elif enc == 'pca':
-                            self.__code_page = '850'
+                        self.__code_page = '850'
-# if __name__ == '__main__':
+if __name__ == '__main__':
-    # encode_obj = DefaultEncoding(
+    import sys
-            # in_file = sys.argv[1],
+    encode_obj = DefaultEncoding(
-            # bug_handler = Exception,
+            in_file = sys.argv[1],
-            # check_raw = True,
+            bug_handler = Exception,
-            # )
+            check_raw = True,
-    # print encode_obj.get_codepage()
+            )
    print encode_obj.get_codepage()
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@ -20,7 +20,7 @@ import sys, os, tempfile
 from calibre.ebooks.rtf2xml import copy
 class DeleteInfo:
-    """Delelet unecessary destination groups"""
+    """Delete unecessary destination groups"""
    def __init__(self,
            in_file ,
            bug_handler,
@ -31,17 +31,14 @@ class DeleteInfo:
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
        self.__initiate_allow()
        self.__bracket_count= 0
        self.__ob_count = 0
        self.__cb_count = 0
        # self.__after_asterisk = False
        # self.__delete = 0
        self.__initiate_allow()
        self.__ob = 0
        self.__write_cb = False
        self.__run_level = run_level
        self.__found_delete = False
        # self.__list = False
    def __initiate_allow(self):
        """
@ -57,6 +54,8 @@ class DeleteInfo:
                            'cw<an<annotation',
                            'cw<cm<comment___',
                            'cw<it<lovr-table',
                            # info table
                            'cw<di<company___',
                            # 'cw<ls<list______',
                        )
        self.__not_allowable = (
@ -116,7 +115,6 @@ class DeleteInfo:
        """
        # Test for {\*}, in which case don't enter
        # delete state
        # self.__after_asterisk = False # only enter this function once
        self.__found_delete = True
        if self.__token_info == 'cb<nu<clos-brack':
            if self.__delete_count == self.__cb_count:
@ -128,7 +126,7 @@ class DeleteInfo:
                # not sure what happens here!
                # believe I have a '{\*}
                if self.__run_level > 3:
-                    msg = 'flag problem\n'
+                    msg = 'Flag problem\n'
                    raise self.__bug_handler, msg
                return True
        elif self.__token_info in self.__allowable :
@ -173,8 +171,8 @@ class DeleteInfo:
        Return True for all control words.
        Return False otherwise.
        """
-        if self.__delete_count == self.__cb_count and self.__token_info ==\
+        if self.__delete_count == self.__cb_count and \
-            'cb<nu<clos-brack':
+                self.__token_info == 'cb<nu<clos-brack':
            self.__state = 'default'
            if self.__write_cb:
                self.__write_cb = False
@ -186,32 +184,24 @@ class DeleteInfo:
            return False
    def delete_info(self):
-        """Main method for handling other methods. Read one line in at
+        """Main method for handling other methods. Read one line at
        a time, and determine whether to print the line based on the state."""
        with open(self.__file, 'r') as read_obj:
            with open(self.__write_to, 'w') as self.__write_obj:
                for line in read_obj:
                    #ob<nu<open-brack<0001
                    to_print = True
                    self.__token_info = line[:16]
                    if self.__token_info == 'ob<nu<open-brack':
                        self.__ob_count = line[-5:-1]
                    if self.__token_info == 'cb<nu<clos-brack':
                        self.__cb_count = line[-5:-1]
                    # Get action to perform
                    action = self.__state_dict.get(self.__state)
                    if not action:
-                        sys.stderr.write(_('No action in dictionary state is "%s" \n')
+                        sys.stderr.write('No action in dictionary state is "%s" \n'
                                % self.__state)
-                    to_print = action(line)
+                    # Print if allowed by action
-                    # if self.__after_asterisk:
+                    if action(line):
                        # to_print = self.__asterisk_func(line)
                    # elif self.__list:
                        # self.__in_list_func(line)
                    # elif self.__delete:
                        # to_print = self.__delete_func(line)
                    # else:
                        # to_print = self.__default_func(line)
                    if to_print:
                        self.__write_obj.write(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
--- a/src/calibre/ebooks/rtf2xml/info.py
+++ b/src/calibre/ebooks/rtf2xml/info.py
@ -15,8 +15,10 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import sys, os, tempfile
+import sys, os, tempfile, re
 from calibre.ebooks.rtf2xml import copy
 class Info:
    """
    Make tags for document-information
@ -42,12 +44,14 @@ class Info:
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
    def __initiate_values(self):
        """
        Initiate all values.
        """
        self.__text_string = ''
        self.__state = 'before_info_table'
        self.rmspace = re.compile(r'\s+')
        self.__state_dict = {
        'before_info_table': self.__before_info_table_func,
        'after_info_table': self.__after_info_table_func,
@ -58,27 +62,49 @@ class Info:
        self.__info_table_dict = {
        'cw<di<title_____'  : (self.__found_tag_with_text_func, 'title'),
        'cw<di<author____'  : (self.__found_tag_with_text_func, 'author'),
        'cw<di<operator__'  : (self.__found_tag_with_text_func, 'operator'),
        'cw<di<manager___'  : (self.__found_tag_with_text_func, 'manager'),
        'cw<di<company___'  : (self.__found_tag_with_text_func, 'company'),
        'cw<di<keywords__'  : (self.__found_tag_with_text_func, 'keywords'),
        'cw<di<category__'  : (self.__found_tag_with_text_func, 'category'),
        'cw<di<doc-notes_'  : (self.__found_tag_with_text_func, 'doc-notes'),
        'cw<di<subject___'  : (self.__found_tag_with_text_func, 'subject'),
-        'cw<di<operator__'  : (self.__found_tag_with_text_func, 'operator'),
+        'cw<di<linkbase__'  : (self.__found_tag_with_text_func, 'hyperlink-base'),
        'cw<di<create-tim'  : (self.__found_tag_with_tokens_func, 'creation-time'),
-        'cw<di<revis-time'  :  (self.__found_tag_with_tokens_func, 'revision-time'),
+        'cw<di<revis-time'  : (self.__found_tag_with_tokens_func, 'revision-time'),
-        'cw<di<edit-time_'  : (self.__single_field_func, 'editing-time'),
+        'cw<di<edit-time_'  : (self.__found_tag_with_tokens_func, 'editing-time'),
        'cw<di<print-time'  : (self.__found_tag_with_tokens_func, 'printing-time'),
        'cw<di<backuptime'  : (self.__found_tag_with_tokens_func, 'backup-time'),
        'cw<di<num-of-wor'  : (self.__single_field_func, 'number-of-words'),
        'cw<di<num-of-chr'  : (self.__single_field_func, 'number-of-characters'),
        'cw<di<numofchrws'  : (self.__single_field_func, 'number-of-characters-without-space'),
        'cw<di<num-of-pag'  : (self.__single_field_func, 'number-of-pages'),
        'cw<di<version___'  : (self.__single_field_func, 'version'),
        'cw<di<intern-ver'  : (self.__single_field_func, 'internal-version-number'),
        'cw<di<internalID'  : (self.__single_field_func, 'internal-id-number'),
        }
        self.__token_dict = {
        'year______'        : 'year',
        'month_____'        : 'month',
        'day_______'        : 'day',
        'minute____'        : 'minute',
        'second____'        : 'second',
        'revis-time'        : 'revision-time',
        'create-tim'        : 'creation-time',
        'edit-time_'        : 'editing-time',
        'print-time'        : 'printing-time',
        'backuptime'        : 'backup-time',
        'num-of-wor'        : 'number-of-words',
        'num-of-chr'        : 'number-of-characters',
        'numofchrws'        : 'number-of-characters-without-space',
        'num-of-pag'        : 'number-of-pages',
        'version___'        : 'version',
        'intern-ver'        : 'internal-version-number',
        'internalID'        : 'internal-id-number',
        }
    def __before_info_table_func(self, line):
        """
        Required:
@ -92,6 +118,7 @@ class Info:
        if self.__token_info == 'mi<mk<doc-in-beg':
            self.__state = 'in_info_table'
        self.__write_obj.write(line)
    def __in_info_table_func(self, line):
        """
        Requires:
@ -112,6 +139,7 @@ class Info:
                action(line, tag)
            else:
                self.__write_obj.write(line)
    def __found_tag_with_text_func(self, line, tag):
        """
        Requires:
@ -126,6 +154,7 @@ class Info:
        """
        self.__tag = tag
        self.__state = 'collect_text'
    def __collect_text_func(self, line):
        """
        Requires:
@ -139,14 +168,17 @@ class Info:
        """
        if self.__token_info == 'mi<mk<docinf-end':
            self.__state = 'in_info_table'
-            self.__write_obj.write(
+            #Don't print empty tags
-                'mi<tg<open______<%s\n'
+            if len(self.rmspace.sub('',self.__text_string)):
-                'tx<nu<__________<%s\n'
+                self.__write_obj.write(
-                'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag)
+                    'mi<tg<open______<%s\n'
-            )
+                    'tx<nu<__________<%s\n'
                    'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag)
                )
            self.__text_string = ''
        elif line[0:2] == 'tx':
            self.__text_string += line[17:-1]
    def __found_tag_with_tokens_func(self, line, tag):
        """
        Requires:
@ -163,6 +195,7 @@ class Info:
        self.__state = 'collect_tokens'
        self.__text_string = 'mi<tg<empty-att_<%s' % tag
        #mi<tg<empty-att_<page-definition<margin>33\n
    def __collect_tokens_func(self, line):
        """
        Requires:
@ -194,18 +227,19 @@ class Info:
            att = line[6:16]
            value = line[20:-1]
            att_changed = self.__token_dict.get(att)
-            if att_changed == None:
+            if att_changed is None:
                if self.__run_level > 3:
-                    msg = 'no dictionary match for %s\n' % att
+                    msg = 'No dictionary match for %s\n' % att
                    raise self.__bug_handler, msg
            else:
                self.__text_string += '<%s>%s' % (att_changed, value)
    def __single_field_func(self, line, tag):
        value = line[20:-1]
        self.__write_obj.write(
-        'mi<tg<empty-att_<%s'
+        'mi<tg<empty-att_<%s<%s>%s\n' % (tag, tag, value)
        '<%s>%s\n' % (tag, tag, value)
        )
    def __after_info_table_func(self, line):
        """
        Requires:
@ -217,6 +251,7 @@ class Info:
            the file.
        """
        self.__write_obj.write(line)
    def fix_info(self):
        """
        Requires:
@ -234,20 +269,15 @@ class Info:
            information table, simply write the line to the output file.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'wb') as self.__write_obj:
-        line_to_read = 1
+                for line in read_obj:
-        while line_to_read:
+                    self.__token_info = line[:16]
-            line_to_read = read_obj.readline()
+                    action = self.__state_dict.get(self.__state)
-            line = line_to_read
+                    if action is None:
-            self.__token_info = line[:16]
+                        sys.stderr.write('No matching state in module styles.py\n')
-            action = self.__state_dict.get(self.__state)
+                        sys.stderr.write(self.__state + '\n')
-            if action == None:
+                    action(line)
                sys.stderr.write('no no matching state in module styles.py\n')
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "info.data")
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@ -70,7 +70,7 @@ class ProcessTokens:
        ';'                  :	('mc', ';', self.ms_sub_func),
        # this must be wrong
        '-'                  :	('mc', '-', self.ms_sub_func),
-        'line'               :  ('mi', 'hardline-break', self.hardline_func), #calibre
+        'line'               :  ('mi', 'hardline-break', self.direct_conv_func), #calibre
        # misc => ml
        '*'                  :	('ml', 'asterisk__', self.default_func),
        ':'                  :	('ml', 'colon_____', self.default_func),
@ -78,7 +78,6 @@ class ProcessTokens:
        'backslash'          :	('nu', '\\', self.text_func),
        'ob'                 :	('nu', '{', self.text_func),
        'cb'                 :	('nu', '}', self.text_func),
        #'line'               :  ('nu', ' ', self.text_func), calibre
        # paragraph formatting => pf
        'page'               :  ('pf', 'page-break', self.default_func),
        'par'                :	('pf', 'par-end___', self.default_func),
@ -231,11 +230,15 @@ class ProcessTokens:
        'trhdr'              :  ('tb', 'row-header', self.default_func),
        # preamble => pr
        # document information => di
        # TODO integrate \userprops
        'info'               :	('di', 'doc-info__', self.default_func),
        'title'              :	('di', 'title_____', self.default_func),
        'author'             :	('di', 'author____', self.default_func),
        'operator'           :	('di', 'operator__', self.default_func),
-        'title'              :	('di', 'title_____', self.default_func),
+        'manager'            :	('di', 'manager___', self.default_func),
        'company'            :	('di', 'company___', self.default_func),
        'keywords'           :  ('di', 'keywords__', self.default_func),
        'category'           :  ('di', 'category__', self.default_func),
        'doccomm'            :  ('di', 'doc-notes_', self.default_func),
        'comment'            :  ('di', 'doc-notes_', self.default_func),
        'subject'            :  ('di', 'subject___', self.default_func),
@ -244,11 +247,19 @@ class ProcessTokens:
        'mo'                 :	('di', 'month_____', self.default_func),
        'dy'                 :	('di', 'day_______', self.default_func),
        'min'                :	('di', 'minute____', self.default_func),
        'sec'                :	('di', 'second____', self.default_func),
        'revtim'             :	('di', 'revis-time', self.default_func),
        'edmins'             :	('di', 'edit-time_', self.default_func),
        'printim'            :	('di', 'print-time', self.default_func),
        'buptim'             :	('di', 'backuptime', self.default_func),
        'nofwords'           :	('di', 'num-of-wor', self.default_func),
        'nofchars'           :	('di', 'num-of-chr', self.default_func),
        'nofcharsws'         :	('di', 'numofchrws', self.default_func),
        'nofpages'           :	('di', 'num-of-pag', self.default_func),
-        'edmins'             :	('di', 'edit-time_', self.default_func),
+        'version'            :	('di', 'version___', self.default_func),
        'vern'               :	('di', 'intern-ver', self.default_func),
        'hlinkbase'          :	('di', 'linkbase__', self.default_func),
        'id'                 :	('di', 'internalID', self.default_func),
        # headers and footers => hf
        'headerf'            :	('hf', 'head-first', self.default_func),
        'headerl'            :	('hf', 'head-left_', self.default_func),
@ -605,7 +616,7 @@ class ProcessTokens:
    def ms_sub_func(self, pre, token, num):
        return 'tx<mc<__________<%s\n' % token
-    def hardline_func(self, pre, token, num):
+    def direct_conv_func(self, pre, token, num):
        return 'mi<tg<empty_____<%s\n' % token
    def default_func(self, pre, token, num):
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -27,11 +27,13 @@ class Tokenize:
            bug_handler,
            copy = None,
            run_level = 1,
-    ):
+            # out_file = None,
        ):
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__write_to = tempfile.mktemp()
        # self.__out_file = out_file
        self.__compile_expressions()
        #variables
        self.__uc_char = 0
@ -113,6 +115,8 @@ class Tokenize:
    def __sub_reg_split(self,input_file):
        input_file = self.__replace_spchar.mreplace(input_file)
        # this is for older RTF
        input_file = self.__par_exp.sub('\n\\par \n', input_file)
        input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
        input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
        #remove \n in bin data
@ -127,7 +131,7 @@ class Tokenize:
        # this is for older RTF
        #line = re.sub(self.__par_exp, '\\par ', line)
        #return filter(lambda x: len(x) > 0, \
-            #(self.__remove_line.sub('', x) for x in tokens))
+            #(self.__remove_line.sub('', x) for x in tokens)) 
    def __compile_expressions(self):
        SIMPLE_RPL = {
@ -153,8 +157,6 @@ class Tokenize:
            # put a backslash in front of to eliminate special cases and
            # make processing easier
            "}": "\\}",
            # this is for older RTF
            r'\\$': '\\par ',
            }
        self.__replace_spchar = MReplace(SIMPLE_RPL)
        #add ;? in case of char following \u
@ -168,10 +170,12 @@ class Tokenize:
        #why keep backslash whereas \is replaced before?
        #remove \n from endline char
        self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
        #this is for old RTF
        self.__par_exp = re.compile(r'\\\n+')
        # self.__par_exp = re.compile(r'\\$')
        #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
        #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
        #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
        #self.__par_exp = re.compile(r'\\$')
        #self.__remove_line = re.compile(r'\n+')
        #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
@ -199,7 +203,24 @@ class Tokenize:
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "tokenize.data")
        # if self.__out_file:
            # self.__file = self.__out_file
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
-        #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
+        #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
 # import sys
 # def main(args=sys.argv):
    # if len(args) < 1:
        # print 'No file'
        # return
    # file = 'data_tokens.txt'
    # if len(args) == 3:
        # file = args[2]
    # to = Tokenize(args[1], Exception, out_file = file)
    # to.tokenize()
 # if __name__ == '__main__':
    # sys.exit(main())
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -505,7 +505,7 @@ class FileDialog(QObject):
        self.selected_files = []
        if mode == QFileDialog.AnyFile:
            f = unicode(QFileDialog.getSaveFileName(parent, title, initial_dir, ftext, ""))
-            if f and os.path.exists(f):
+            if f:
                self.selected_files.append(f)
        elif mode == QFileDialog.ExistingFile:
            f = unicode(QFileDialog.getOpenFileName(parent, title, initial_dir, ftext, ""))
--- a/src/calibre/gui2/actions/catalog.py
+++ b/src/calibre/gui2/actions/catalog.py
@ -28,7 +28,7 @@ class GenerateCatalogAction(InterfaceAction):
        if not ids:
            return error_dialog(self.gui, _('No books selected'),
-                    _('No books selected to generate catalog for'),
+                    _('No books selected for catalog generation'),
                    show=True)
 		db = self.gui.library_view.model().db
@ -55,9 +55,9 @@ class GenerateCatalogAction(InterfaceAction):
    def catalog_generated(self, job):
        if job.result:
-            # Search terms nulled catalog results
+            # Error during catalog generation
-            return error_dialog(self.gui, _('No books found'),
+            return error_dialog(self.gui, _('Catalog generation terminated'),
-                    _("No books to catalog\nCheck job details"),
+                    job.result,
                    show=True)
        if job.failed:
            return self.gui.job_exception(job)
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1144,7 +1144,9 @@ class EPUB_MOBI(CatalogPlugin):
            def error(self):
                def fget(self):
                    return self.__error
-                return property(fget=fget)
+                def fset(self, val):
                    self.__error = val
                return property(fget=fget,fset=fset)
            @dynamic_property
            def generateForKindle(self):
                def fget(self):
@ -1411,6 +1413,88 @@ class EPUB_MOBI(CatalogPlugin):
                except:
                    pass
        def fetchBooksByAuthor(self):
            '''
            Generate a list of titles sorted by author from the database
            return = Success
            '''
            self.updateProgressFullStep("Sorting database")
            '''
            # Sort titles case-insensitive, by author
            self.booksByAuthor = sorted(self.booksByTitle,
                                 key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
            '''
            self.booksByAuthor = list(self.booksByTitle)
            self.booksByAuthor.sort(self.author_compare)
            if False and self.verbose:
                self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
                self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
                for title in self.booksByAuthor:
                    self.opts.log.info((u" %-30s %-20s%5s " % \
                                        (title['title'][:30],
                                         title['series'][:20] if title['series'] else '',
                                         title['series_index'],
                                         )).encode('utf-8'))
                raise SystemExit
            # Build the unique_authors set from existing data
            authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
            # authors[] contains a list of all book authors, with multiple entries for multiple books by author
            #        authors[]: (([0]:friendly  [1]:sort))
            # unique_authors[]: (([0]:friendly  [1]:sort  [2]:book_count))
            books_by_current_author = 0
            current_author = authors[0]
            multiple_authors = False
            unique_authors = []
            for (i,author) in enumerate(authors):
                if author != current_author:
                    # Note that current_author and author are tuples: (friendly, sort)
                    multiple_authors = True
                if author != current_author and i:
                    # Warn, exit if friendly matches previous, but sort doesn't
                    if author[0] == current_author[0]:
                        error_msg = _('''
 \n*** Metadata error ***
 Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
 Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
 then rebuild the catalog.\n''').format(author[0])
                        self.opts.log.warn(error_msg)
                        self.error = error_msg
                        return False
                    # New author, save the previous author/sort/count
                    unique_authors.append((current_author[0], icu_title(current_author[1]),
                                           books_by_current_author))
                    current_author = author
                    books_by_current_author = 1
                elif i==0 and len(authors) == 1:
                    # Allow for single-book lists
                    unique_authors.append((current_author[0], icu_title(current_author[1]),
                                           books_by_current_author))
                else:
                    books_by_current_author += 1
            else:
                # Add final author to list or single-author dataset
                if (current_author == author and len(authors) > 1) or not multiple_authors:
                    unique_authors.append((current_author[0], icu_title(current_author[1]),
                                           books_by_current_author))
            if False and self.verbose:
                self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
                for author in unique_authors:
                    self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
                       author[2])).encode('utf-8'))
            self.authors = unique_authors
            return True
        def fetchBooksByTitle(self):
            self.updateProgressFullStep("Fetching database")
@ -1562,90 +1646,9 @@ class EPUB_MOBI(CatalogPlugin):
                                                               title['title_sort'][0:40])).decode('mac-roman'))
                return True
            else:
                self.error = _("No books found to catalog.\nCheck 'Excluded books' criteria in E-book options.")
                return False
        def fetchBooksByAuthor(self):
            '''
            Generate a list of titles sorted by author from the database
            return = Success
            '''
            self.updateProgressFullStep("Sorting database")
            '''
            # Sort titles case-insensitive, by author
            self.booksByAuthor = sorted(self.booksByTitle,
                                 key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
            '''
            self.booksByAuthor = list(self.booksByTitle)
            self.booksByAuthor.sort(self.author_compare)
            if False and self.verbose:
                self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
                self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
                for title in self.booksByAuthor:
                    self.opts.log.info((u" %-30s %-20s%5s " % \
                                        (title['title'][:30],
                                         title['series'][:20] if title['series'] else '',
                                         title['series_index'],
                                         )).encode('utf-8'))
                raise SystemExit
            # Build the unique_authors set from existing data
            authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
            # authors[] contains a list of all book authors, with multiple entries for multiple books by author
            #        authors[]: (([0]:friendly  [1]:sort))
            # unique_authors[]: (([0]:friendly  [1]:sort  [2]:book_count))
            books_by_current_author = 0
            current_author = authors[0]
            multiple_authors = False
            unique_authors = []
            for (i,author) in enumerate(authors):
                if author != current_author:
                    # Note that current_author and author are tuples: (friendly, sort)
                    multiple_authors = True
                if author != current_author and i:
                    # Warn, exit if friendly matches previous, but sort doesn't
                    if author[0] == current_author[0]:
                        error_msg = _('''
 \n*** Metadata error ***
 Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
 Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
 then rebuild the catalog.
 *** Terminating catalog generation ***\n''').format(author[0])
                        self.opts.log.warn(error_msg)
                        return False
                    # New author, save the previous author/sort/count
                    unique_authors.append((current_author[0], icu_title(current_author[1]),
                                           books_by_current_author))
                    current_author = author
                    books_by_current_author = 1
                elif i==0 and len(authors) == 1:
                    # Allow for single-book lists
                    unique_authors.append((current_author[0], icu_title(current_author[1]),
                                           books_by_current_author))
                else:
                    books_by_current_author += 1
            else:
                # Add final author to list or single-author dataset
                if (current_author == author and len(authors) > 1) or not multiple_authors:
                    unique_authors.append((current_author[0], icu_title(current_author[1]),
                                           books_by_current_author))
            if False and self.verbose:
                self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
                for author in unique_authors:
                    self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
                       author[2])).encode('utf-8'))
            self.authors = unique_authors
            return True
        def fetchBookmarks(self):
            '''
            Collect bookmarks for catalog entries
@ -5069,6 +5072,8 @@ then rebuild the catalog.
                            abort_after_input_dump=False)
            plumber.merge_ui_recommendations(recommendations)
            plumber.run()
-            return 0
+            # returns to gui2.actions.catalog:catalog_generated()
            return None
        else:
-            return 1
+            # returns to gui2.actions.catalog:catalog_generated()
            return catalog.error
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -693,8 +693,12 @@ def command_catalog(args, dbpath):
                            }
    with plugin:
-        plugin.run(args[1], opts, get_db(dbpath, opts))
+        ret = plugin.run(args[1], opts, get_db(dbpath, opts))
-    return 0
+    if ret is None:
        ret = 0
    else:
        ret = 1
    return ret
 # end of GR additions
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -690,10 +690,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        mi = Metadata(None)
        aut_list = row[fm['au_map']]
-        if not aut_list:
+        if aut_list:
-            aut_list = []
+            aut_list = [p.split(':::') for p in aut_list.split(':#:') if p]
        else:
-            aut_list = [p.split(':::') for p in aut_list.split(':#:')]
+            aut_list = []
        aum = []
        aus = {}
        for (author, author_sort) in aut_list:
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -437,6 +437,15 @@ My antivirus program claims |app| is a virus/trojan?
 Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it.
 How do I backup |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The most important thing to backup is the |app| library folder, that contains all your books and metadata. This is the folder you chose for your |app| library when you ran |app| for the first time. You can get the path to the library folder by clicking the |app| icon on the main toolbar. You must backup this complete folder with all its files and sub-folders. 
 You can switch |app| to using a backed up library folder by simply clicking the |app| icon on the toolbar and choosing your backup library folder. 
 If you want to backup the |app| configuration/plugins, you have to backup the config directory. You can find this config directory via :guilabel:`Preferences->Miscellaneous`. Note that restoring configuration directories is not officially supported, but should work in most cases. Just copy the contents of the backup directory into the current configuration directory to restore.
 How do I use purchased EPUB books with |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Most purchased EPUB books have `DRM <http://wiki.mobileread.com/wiki/DRM>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to |app|.