diff --git a/resources/images/mimetypes/odt.svg b/resources/images/mimetypes/odt.svg new file mode 100644 index 0000000000..f8c3df5d04 --- /dev/null +++ b/resources/images/mimetypes/odt.svg @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index d8bd0267ee..0000000000 --- a/setup.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -import sys, os, optparse - -sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) - -import setup.commands as commands -from setup import prints, get_warnings - -def check_version_info(): - vi = sys.version_info - if vi[0] == 2 and vi[1] > 5: - return None - return 'calibre requires python >= 2.6' - -def option_parser(): - parser = optparse.OptionParser() - parser.add_option('-c', '--clean', default=False, action='store_true', - help=('Instead of running the command delete all files generated ' - 'by the command')) - parser.add_option('--clean-backups', default=False, action='store_true', - help='Delete all backup files from the source tree') - parser.add_option('--clean-all', default=False, action='store_true', - help='Delete all machine generated files from the source tree') - return parser - -def clean_backups(): - for root, _, files in os.walk('.'): - for name in files: - for t in ('.pyc', '.pyo', '~', '.swp', '.swo'): - if name.endswith(t): - os.remove(os.path.join(root, name)) - - -def main(args=sys.argv): - if len(args) == 1 or args[1] in ('-h', '--help'): - print 'Usage: python', args[0], 'command', '[options]' - print '\nWhere command is one of:' - print - for x in sorted(commands.__all__): - print '%-20s -'%x, - c = getattr(commands, x) - desc = getattr(c, 'short_description', c.description) - print desc - - print '\nTo get help on a particular command, run:' - print '\tpython', args[0], 'command -h' - return 1 - - command = args[1] - if command not in commands.__all__: - print command, 'is not a recognized command.' - print 'Valid commands:', ', '.join(commands.__all__) - return 1 - - command = getattr(commands, command) - - parser = option_parser() - command.add_all_options(parser) - parser.set_usage('Usage: python setup.py %s [options]\n\n'%args[1]+\ - command.description) - - opts, args = parser.parse_args(args) - - if opts.clean_backups: - clean_backups() - - if opts.clean: - prints('Cleaning', args[1]) - command.clean() - return 0 - - if opts.clean_all: - for cmd in commands.__all__: - prints('Cleaning', cmd) - getattr(commands, cmd).clean() - return 0 - - command.run_all(opts) - - warnings = get_warnings() - if warnings: - print - prints('There were', len(warnings), 'warning(s):') - print - for args, kwargs in warnings: - prints('*', *args, **kwargs) - print - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index 2622d82d99..247f3e8cef 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -50,7 +50,7 @@ class RTFInput(InputFormatPlugin): parser = ParseRtf( in_file = stream, out_file = ofile, - #deb_dir = 'I:\\Calibre\\rtfdebug', + deb_dir = 'I:\\Calibre\\rtfdebug', # Convert symbol fonts to unicode equivalents. Default # is 1 convert_symbol = 1, @@ -187,16 +187,17 @@ class RTFInput(InputFormatPlugin): self.log = log self.log('Converting RTF to XML...') #Name of the preprocesssed RTF file - fname = self.preprocess(stream.name) + #fname = self.preprocess(stream.name) + fname = stream.name try: xml = self.generate_xml(fname) except RtfInvalidCodeException, e: raise ValueError(_('This RTF file has a feature calibre does not ' 'support. Convert it to HTML first and then try it.\n%s')%e) - '''dataxml = open('dataxml.xml', 'w') + dataxml = open('dataxml.xml', 'w') dataxml.write(xml) - dataxml.close''' + dataxml.close d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) if d: diff --git a/src/calibre/ebooks/rtf/preprocess.py b/src/calibre/ebooks/rtf/preprocess.py index a3076651fd..967bffe91b 100644 --- a/src/calibre/ebooks/rtf/preprocess.py +++ b/src/calibre/ebooks/rtf/preprocess.py @@ -228,8 +228,9 @@ class RtfTokenizer(): def tokenize(self): i = 0 lastDataStart = -1 + #on parse caractere par caractere while i < len(self.rtfData): - + #si ça commence un grpupe if isChar(self.rtfData[i], '{'): if lastDataStart > -1: self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) @@ -237,7 +238,7 @@ class RtfTokenizer(): self.tokens.append(tokenDelimitatorStart()) i = i + 1 continue - + #si ça fini un grpupe if isChar(self.rtfData[i], '}'): if lastDataStart > -1: self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) @@ -245,7 +246,7 @@ class RtfTokenizer(): self.tokens.append(tokenDelimitatorEnd()) i = i + 1 continue - + #on copie si il y a un charactere de controle if isChar(self.rtfData[i], '\\'): if i + 1 >= len(self.rtfData): raise Exception('Error: Control character found at the end of the document.') @@ -254,6 +255,7 @@ class RtfTokenizer(): self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) lastDataStart = -1 + # le token commence ici tokenStart = i i = i + 1 diff --git a/src/calibre/ebooks/rtf2xml/line_endings.py b/src/calibre/ebooks/rtf2xml/line_endings.py index 6511657aa9..e77e5d747c 100755 --- a/src/calibre/ebooks/rtf2xml/line_endings.py +++ b/src/calibre/ebooks/rtf2xml/line_endings.py @@ -32,7 +32,7 @@ class FixLineEndings: self.__write_to = tempfile.mktemp() self.__replace_illegals = replace_illegals def fix_endings(self): - illegal_regx = re.compile( '\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13') + illegal_regx = re.compile('\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13') # always check since I have to get rid of illegal characters #read read_obj = open(self.__file, 'r') diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py index e162e8c992..3aa2079fb3 100755 --- a/src/calibre/ebooks/rtf2xml/tokenize.py +++ b/src/calibre/ebooks/rtf2xml/tokenize.py @@ -16,7 +16,10 @@ # # ######################################################################### import os, re, tempfile + from calibre.ebooks.rtf2xml import copy +from calibre.utils.mreplace import MReplace + class Tokenize: """Tokenize RTF into one line per field. Each line will contain information useful for the rest of the script""" def __init__(self, @@ -28,20 +31,162 @@ class Tokenize: self.__file = in_file self.__bug_handler = bug_handler self.__copy = copy - self.__special_tokens = [ '_', '~', "'", '{', '}' ] self.__write_to = tempfile.mktemp() + self.__compile_expressions() + #variables + self.__uc_char = 0 + self.__uc_bin = False + self.__uc_value = [1] + def __from_ms_to_utf8(self,match_obj): uni_char = int(match_obj.group(1)) if uni_char < 0: uni_char += 65536 return '&#x' + str('%X' % uni_char) + ';' - def __neg_unicode_func(self, match_obj): - neg_uni_char = int(match_obj.group(1)) * -1 - # sys.stderr.write(str( neg_uni_char)) - uni_char = neg_uni_char + 65536 - return '&#x' + str('%X' % uni_char) + ';' - def __sub_line_reg(self,line): - line = line.replace("\\\\", "\\backslash ") + + def __reini_utf8_counters(self): + self.__uc_char = 0 + self.__uc_bin = False + + def __unicode_process(self, token): + #change scope in + if token == '\{': + self.__uc_value.append(self.__uc_value[-1]) + #basic error handling + self.__reini_utf8_counters() + return token + #change scope out: evaluate dict and rebuild + elif token == '\}': + #self.__uc_value.pop() + self.__reini_utf8_counters() + return token + #add a uc control + elif token[:3] == '\uc': + self.__uc_value[-1] = int(token[3:]) + self.__reini_utf8_counters() + return token + #handle uc skippable char + elif self.__uc_char: + #if token[:1] == "\" and token[:1] == "\" + pass + #go for real \u token + match_obj = self.__utf_exp.match(token) + if match_obj is not None: + #get value and handle negative case + uni_char = int(match_obj.group(1)) + uni_len = len(match_obj.group(1)) + 2 + if uni_char < 0: + uni_char += 65536 + uni_char = unichr(uni_char).encode('ascii', 'xmlcharrefreplace') + #if not uc0 + if self.__uc_value[-1]: + self.__uc_char = self.__uc_value[-1] + #there is only an unicode char + if len(token)<= uni_len: + return uni_char + #an unicode char and something else + #must be after as it is splited on \ + elif not self.__uc_value[-1]: + print('not only token uc0 token: ' + uni_char + token[uni_len:]) + return uni_char + token[uni_len:] + #if not uc0 and chars + else: + for i in xrange(uni_len, len(token)): + if token[i] == " ": + continue + elif self.__uc_char > 0: + self.__uc_char -= 1 + else: + return uni_char + token[i:] + #print('uc: ' + str(self.__uc_value) + 'uni: ' + str(uni_char) + 'token: ' + token) + #default + return token + + def __sub_reg_split(self,input_file): + input_file = self.__replace_spchar.mreplace(input_file) + #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file) + # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line) + # this is for older RTF + #line = re.sub(self.__par_exp, '\\par ', line) + input_file = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", input_file) + #split + tokens = re.split(self.__splitexp, input_file) + #remove empty tokens and \n + return filter(lambda x: len(x) > 0 and x != '\n', tokens) + #return filter(lambda x: len(x) > 0, \ + #(self.__remove_line.sub('', x) for x in tokens)) + + + def __compile_expressions(self): + SIMPLE_RPL = { + "\\\\": "\\backslash ", + "\\~": "\\~ ", + "\\;": "\\; ", + "&": "&", + "<": "<", + ">": ">", + "\\~": "\\~ ", + "\\_": "\\_ ", + "\\:": "\\: ", + "\\-": "\\- ", + # turn into a generic token to eliminate special + # cases and make processing easier + "\\{": "\\ob ", + # turn into a generic token to eliminate special + # cases and make processing easier + "\\}": "\\cb ", + # put a backslash in front of to eliminate special cases and + # make processing easier + "{": "\\{", + # put a backslash in front of to eliminate special cases and + # make processing easier + "}": "\\}", + # this is for older RTF + r'\\$': '\\par ', + } + self.__replace_spchar = MReplace(SIMPLE_RPL) + self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)" + self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}") #modify this + #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})") + #add \n in split for whole file reading + #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)") + #why keep backslash whereas \is replaced before? + self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)") + #self.__par_exp = re.compile(r'\\$') + #self.__remove_line = re.compile(r'\n+') + #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)") + ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)") + + def tokenize(self): + """Main class for handling other methods. Reads the file \ + , uses method self.sub_reg to make basic substitutions,\ + and process tokens by itself""" + #read + read_obj = open(self.__file, 'r') + input_file = read_obj.read() + read_obj.close() + + #process simple replacements and split giving us a correct list + #remove '' and \n in the process + tokens = self.__sub_reg_split(input_file) + #correct unicode + #tokens = map(self.__unicode_process, tokens) + #remove empty items created by removing \uc + #tokens = filter(lambda x: len(x) > 0, tokens) + + #write + write_obj = open(self.__write_to, 'wb') + write_obj.write('\n'.join(tokens)) + write_obj.close() + #Move and copy + copy_obj = copy.Copy(bug_handler = self.__bug_handler) + if self.__copy: + copy_obj.copy_file(self.__write_to, "tokenize.data") + copy_obj.rename(self.__write_to, self.__file) + os.remove(self.__write_to) + + #self.__special_tokens = [ '_', '~', "'", '{', '}' ] + '''line = line.replace("\\\\", "\\backslash ") line = line.replace("\\~", "\\~ ") line = line.replace("\\;", "\\; ") line = line.replace("&", "&") @@ -63,54 +208,37 @@ class Tokenize: # put a backslash in front of to eliminate special cases and # make processing easier line = line.replace("}", "\\}") - line = re.sub(self.__utf_exp, self.__from_ms_to_utf8, line) - # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line) - line = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", line) - ##line = line.replace("\\backslash", "\\\\") - # this is for older RTF - line = re.sub(self.__par_exp, '\\par ', line) - return line - def __compile_expressions(self): - self.__ms_hex_exp = re.compile(r"\\\'(..)") - self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}") - self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)") - self.__par_exp = re.compile(r'\\$') - self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)") - ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)") - def __create_tokens(self): - self.__compile_expressions() - read_obj = open(self.__file, 'r') - write_obj = open(self.__write_to, 'wb') + line_to_read = "dummy" while line_to_read: line_to_read = read_obj.readline() line = line_to_read line = line.replace("\n", "") - line = self.__sub_line_reg(line) - tokens = re.split(self.__splitexp, line) - ##print tokens - for token in tokens: - if token != "": + ''' + '''if token != "": + write_obj.write(token + "\n") + + match_obj = re.search(self.__mixed_exp, token) + if match_obj != None: + first = match_obj.group(1) + second = match_obj.group(2) + write_obj.write(first + "\n") + write_obj.write(second + "\n") + else: write_obj.write(token + "\n") - """ - match_obj = re.search(self.__mixed_exp, token) - if match_obj != None: - first = match_obj.group(1) - second = match_obj.group(2) - write_obj.write(first + "\n") - write_obj.write(second + "\n") - else: - write_obj.write(token + "\n") - """ - read_obj.close() - write_obj.close() - def tokenize(self): - """Main class for handling other methods. Reads in one line \ - at a time, uses method self.sub_line to make basic substitutions,\ - uses ? to process tokens""" - self.__create_tokens() - copy_obj = copy.Copy(bug_handler = self.__bug_handler) - if self.__copy: - copy_obj.copy_file(self.__write_to, "tokenize.data") - copy_obj.rename(self.__write_to, self.__file) - os.remove(self.__write_to) + ''' + ''' + for line in read_obj: + #make all replacements + line = self.__sub_reg(line) + #split token and remove empty tokens + tokens = filter(lambda x: len(x) > 0, + re.split(self.__splitexp, line)) + if tokens: + write_obj.write('\n'.join(tokens)+'\n')''' + + '''def __neg_unicode_func(self, match_obj): + neg_uni_char = int(match_obj.group(1)) * -1 + # sys.stderr.write(str( neg_uni_char)) + uni_char = neg_uni_char + 65536 + return '&#x' + str('%X' % uni_char) + ';''' \ No newline at end of file diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 41d72d17f1..686d705abb 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -329,6 +329,7 @@ class FileIconProvider(QFileIconProvider): 'epub' : 'epub', 'fb2' : 'fb2', 'rtf' : 'rtf', + 'odt' : 'odt', } def __init__(self):