diff --git a/resources/images/mimetypes/odt.svg b/resources/images/mimetypes/odt.svg
new file mode 100644
index 0000000000..f8c3df5d04
--- /dev/null
+++ b/resources/images/mimetypes/odt.svg
@@ -0,0 +1,63 @@
+
+
+
\ No newline at end of file
diff --git a/setup.py b/setup.py
deleted file mode 100644
index d8bd0267ee..0000000000
--- a/setup.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-from __future__ import with_statement
-
-__license__ = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal '
-__docformat__ = 'restructuredtext en'
-
-import sys, os, optparse
-
-sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
-
-import setup.commands as commands
-from setup import prints, get_warnings
-
-def check_version_info():
- vi = sys.version_info
- if vi[0] == 2 and vi[1] > 5:
- return None
- return 'calibre requires python >= 2.6'
-
-def option_parser():
- parser = optparse.OptionParser()
- parser.add_option('-c', '--clean', default=False, action='store_true',
- help=('Instead of running the command delete all files generated '
- 'by the command'))
- parser.add_option('--clean-backups', default=False, action='store_true',
- help='Delete all backup files from the source tree')
- parser.add_option('--clean-all', default=False, action='store_true',
- help='Delete all machine generated files from the source tree')
- return parser
-
-def clean_backups():
- for root, _, files in os.walk('.'):
- for name in files:
- for t in ('.pyc', '.pyo', '~', '.swp', '.swo'):
- if name.endswith(t):
- os.remove(os.path.join(root, name))
-
-
-def main(args=sys.argv):
- if len(args) == 1 or args[1] in ('-h', '--help'):
- print 'Usage: python', args[0], 'command', '[options]'
- print '\nWhere command is one of:'
- print
- for x in sorted(commands.__all__):
- print '%-20s -'%x,
- c = getattr(commands, x)
- desc = getattr(c, 'short_description', c.description)
- print desc
-
- print '\nTo get help on a particular command, run:'
- print '\tpython', args[0], 'command -h'
- return 1
-
- command = args[1]
- if command not in commands.__all__:
- print command, 'is not a recognized command.'
- print 'Valid commands:', ', '.join(commands.__all__)
- return 1
-
- command = getattr(commands, command)
-
- parser = option_parser()
- command.add_all_options(parser)
- parser.set_usage('Usage: python setup.py %s [options]\n\n'%args[1]+\
- command.description)
-
- opts, args = parser.parse_args(args)
-
- if opts.clean_backups:
- clean_backups()
-
- if opts.clean:
- prints('Cleaning', args[1])
- command.clean()
- return 0
-
- if opts.clean_all:
- for cmd in commands.__all__:
- prints('Cleaning', cmd)
- getattr(commands, cmd).clean()
- return 0
-
- command.run_all(opts)
-
- warnings = get_warnings()
- if warnings:
- print
- prints('There were', len(warnings), 'warning(s):')
- print
- for args, kwargs in warnings:
- prints('*', *args, **kwargs)
- print
-
- return 0
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 2622d82d99..247f3e8cef 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -50,7 +50,7 @@ class RTFInput(InputFormatPlugin):
parser = ParseRtf(
in_file = stream,
out_file = ofile,
- #deb_dir = 'I:\\Calibre\\rtfdebug',
+ deb_dir = 'I:\\Calibre\\rtfdebug',
# Convert symbol fonts to unicode equivalents. Default
# is 1
convert_symbol = 1,
@@ -187,16 +187,17 @@ class RTFInput(InputFormatPlugin):
self.log = log
self.log('Converting RTF to XML...')
#Name of the preprocesssed RTF file
- fname = self.preprocess(stream.name)
+ #fname = self.preprocess(stream.name)
+ fname = stream.name
try:
xml = self.generate_xml(fname)
except RtfInvalidCodeException, e:
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e)
- '''dataxml = open('dataxml.xml', 'w')
+ dataxml = open('dataxml.xml', 'w')
dataxml.write(xml)
- dataxml.close'''
+ dataxml.close
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d:
diff --git a/src/calibre/ebooks/rtf/preprocess.py b/src/calibre/ebooks/rtf/preprocess.py
index a3076651fd..967bffe91b 100644
--- a/src/calibre/ebooks/rtf/preprocess.py
+++ b/src/calibre/ebooks/rtf/preprocess.py
@@ -228,8 +228,9 @@ class RtfTokenizer():
def tokenize(self):
i = 0
lastDataStart = -1
+ #on parse caractere par caractere
while i < len(self.rtfData):
-
+ #si ça commence un grpupe
if isChar(self.rtfData[i], '{'):
if lastDataStart > -1:
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
@@ -237,7 +238,7 @@ class RtfTokenizer():
self.tokens.append(tokenDelimitatorStart())
i = i + 1
continue
-
+ #si ça fini un grpupe
if isChar(self.rtfData[i], '}'):
if lastDataStart > -1:
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
@@ -245,7 +246,7 @@ class RtfTokenizer():
self.tokens.append(tokenDelimitatorEnd())
i = i + 1
continue
-
+ #on copie si il y a un charactere de controle
if isChar(self.rtfData[i], '\\'):
if i + 1 >= len(self.rtfData):
raise Exception('Error: Control character found at the end of the document.')
@@ -254,6 +255,7 @@ class RtfTokenizer():
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
lastDataStart = -1
+ # le token commence ici
tokenStart = i
i = i + 1
diff --git a/src/calibre/ebooks/rtf2xml/line_endings.py b/src/calibre/ebooks/rtf2xml/line_endings.py
index 6511657aa9..e77e5d747c 100755
--- a/src/calibre/ebooks/rtf2xml/line_endings.py
+++ b/src/calibre/ebooks/rtf2xml/line_endings.py
@@ -32,7 +32,7 @@ class FixLineEndings:
self.__write_to = tempfile.mktemp()
self.__replace_illegals = replace_illegals
def fix_endings(self):
- illegal_regx = re.compile( '\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13')
+ illegal_regx = re.compile('\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13')
# always check since I have to get rid of illegal characters
#read
read_obj = open(self.__file, 'r')
diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index e162e8c992..3aa2079fb3 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -16,7 +16,10 @@
# #
#########################################################################
import os, re, tempfile
+
from calibre.ebooks.rtf2xml import copy
+from calibre.utils.mreplace import MReplace
+
class Tokenize:
"""Tokenize RTF into one line per field. Each line will contain information useful for the rest of the script"""
def __init__(self,
@@ -28,20 +31,162 @@ class Tokenize:
self.__file = in_file
self.__bug_handler = bug_handler
self.__copy = copy
- self.__special_tokens = [ '_', '~', "'", '{', '}' ]
self.__write_to = tempfile.mktemp()
+ self.__compile_expressions()
+ #variables
+ self.__uc_char = 0
+ self.__uc_bin = False
+ self.__uc_value = [1]
+
def __from_ms_to_utf8(self,match_obj):
uni_char = int(match_obj.group(1))
if uni_char < 0:
uni_char += 65536
return '' + str('%X' % uni_char) + ';'
- def __neg_unicode_func(self, match_obj):
- neg_uni_char = int(match_obj.group(1)) * -1
- # sys.stderr.write(str( neg_uni_char))
- uni_char = neg_uni_char + 65536
- return '' + str('%X' % uni_char) + ';'
- def __sub_line_reg(self,line):
- line = line.replace("\\\\", "\\backslash ")
+
+ def __reini_utf8_counters(self):
+ self.__uc_char = 0
+ self.__uc_bin = False
+
+ def __unicode_process(self, token):
+ #change scope in
+ if token == '\{':
+ self.__uc_value.append(self.__uc_value[-1])
+ #basic error handling
+ self.__reini_utf8_counters()
+ return token
+ #change scope out: evaluate dict and rebuild
+ elif token == '\}':
+ #self.__uc_value.pop()
+ self.__reini_utf8_counters()
+ return token
+ #add a uc control
+ elif token[:3] == '\uc':
+ self.__uc_value[-1] = int(token[3:])
+ self.__reini_utf8_counters()
+ return token
+ #handle uc skippable char
+ elif self.__uc_char:
+ #if token[:1] == "\" and token[:1] == "\"
+ pass
+ #go for real \u token
+ match_obj = self.__utf_exp.match(token)
+ if match_obj is not None:
+ #get value and handle negative case
+ uni_char = int(match_obj.group(1))
+ uni_len = len(match_obj.group(1)) + 2
+ if uni_char < 0:
+ uni_char += 65536
+ uni_char = unichr(uni_char).encode('ascii', 'xmlcharrefreplace')
+ #if not uc0
+ if self.__uc_value[-1]:
+ self.__uc_char = self.__uc_value[-1]
+ #there is only an unicode char
+ if len(token)<= uni_len:
+ return uni_char
+ #an unicode char and something else
+ #must be after as it is splited on \
+ elif not self.__uc_value[-1]:
+ print('not only token uc0 token: ' + uni_char + token[uni_len:])
+ return uni_char + token[uni_len:]
+ #if not uc0 and chars
+ else:
+ for i in xrange(uni_len, len(token)):
+ if token[i] == " ":
+ continue
+ elif self.__uc_char > 0:
+ self.__uc_char -= 1
+ else:
+ return uni_char + token[i:]
+ #print('uc: ' + str(self.__uc_value) + 'uni: ' + str(uni_char) + 'token: ' + token)
+ #default
+ return token
+
+ def __sub_reg_split(self,input_file):
+ input_file = self.__replace_spchar.mreplace(input_file)
+ #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
+ # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
+ # this is for older RTF
+ #line = re.sub(self.__par_exp, '\\par ', line)
+ input_file = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", input_file)
+ #split
+ tokens = re.split(self.__splitexp, input_file)
+ #remove empty tokens and \n
+ return filter(lambda x: len(x) > 0 and x != '\n', tokens)
+ #return filter(lambda x: len(x) > 0, \
+ #(self.__remove_line.sub('', x) for x in tokens))
+
+
+ def __compile_expressions(self):
+ SIMPLE_RPL = {
+ "\\\\": "\\backslash ",
+ "\\~": "\\~ ",
+ "\\;": "\\; ",
+ "&": "&",
+ "<": "<",
+ ">": ">",
+ "\\~": "\\~ ",
+ "\\_": "\\_ ",
+ "\\:": "\\: ",
+ "\\-": "\\- ",
+ # turn into a generic token to eliminate special
+ # cases and make processing easier
+ "\\{": "\\ob ",
+ # turn into a generic token to eliminate special
+ # cases and make processing easier
+ "\\}": "\\cb ",
+ # put a backslash in front of to eliminate special cases and
+ # make processing easier
+ "{": "\\{",
+ # put a backslash in front of to eliminate special cases and
+ # make processing easier
+ "}": "\\}",
+ # this is for older RTF
+ r'\\$': '\\par ',
+ }
+ self.__replace_spchar = MReplace(SIMPLE_RPL)
+ self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
+ self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}") #modify this
+ #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
+ #add \n in split for whole file reading
+ #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
+ #why keep backslash whereas \is replaced before?
+ self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
+ #self.__par_exp = re.compile(r'\\$')
+ #self.__remove_line = re.compile(r'\n+')
+ #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
+ ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
+
+ def tokenize(self):
+ """Main class for handling other methods. Reads the file \
+ , uses method self.sub_reg to make basic substitutions,\
+ and process tokens by itself"""
+ #read
+ read_obj = open(self.__file, 'r')
+ input_file = read_obj.read()
+ read_obj.close()
+
+ #process simple replacements and split giving us a correct list
+ #remove '' and \n in the process
+ tokens = self.__sub_reg_split(input_file)
+ #correct unicode
+ #tokens = map(self.__unicode_process, tokens)
+ #remove empty items created by removing \uc
+ #tokens = filter(lambda x: len(x) > 0, tokens)
+
+ #write
+ write_obj = open(self.__write_to, 'wb')
+ write_obj.write('\n'.join(tokens))
+ write_obj.close()
+ #Move and copy
+ copy_obj = copy.Copy(bug_handler = self.__bug_handler)
+ if self.__copy:
+ copy_obj.copy_file(self.__write_to, "tokenize.data")
+ copy_obj.rename(self.__write_to, self.__file)
+ os.remove(self.__write_to)
+
+ #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
+ '''line = line.replace("\\\\", "\\backslash ")
line = line.replace("\\~", "\\~ ")
line = line.replace("\\;", "\\; ")
line = line.replace("&", "&")
@@ -63,54 +208,37 @@ class Tokenize:
# put a backslash in front of to eliminate special cases and
# make processing easier
line = line.replace("}", "\\}")
- line = re.sub(self.__utf_exp, self.__from_ms_to_utf8, line)
- # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
- line = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", line)
- ##line = line.replace("\\backslash", "\\\\")
- # this is for older RTF
- line = re.sub(self.__par_exp, '\\par ', line)
- return line
- def __compile_expressions(self):
- self.__ms_hex_exp = re.compile(r"\\\'(..)")
- self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
- self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
- self.__par_exp = re.compile(r'\\$')
- self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
- ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
- def __create_tokens(self):
- self.__compile_expressions()
- read_obj = open(self.__file, 'r')
- write_obj = open(self.__write_to, 'wb')
+
line_to_read = "dummy"
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
line = line.replace("\n", "")
- line = self.__sub_line_reg(line)
- tokens = re.split(self.__splitexp, line)
- ##print tokens
- for token in tokens:
- if token != "":
+ '''
+ '''if token != "":
+ write_obj.write(token + "\n")
+
+ match_obj = re.search(self.__mixed_exp, token)
+ if match_obj != None:
+ first = match_obj.group(1)
+ second = match_obj.group(2)
+ write_obj.write(first + "\n")
+ write_obj.write(second + "\n")
+ else:
write_obj.write(token + "\n")
- """
- match_obj = re.search(self.__mixed_exp, token)
- if match_obj != None:
- first = match_obj.group(1)
- second = match_obj.group(2)
- write_obj.write(first + "\n")
- write_obj.write(second + "\n")
- else:
- write_obj.write(token + "\n")
- """
- read_obj.close()
- write_obj.close()
- def tokenize(self):
- """Main class for handling other methods. Reads in one line \
- at a time, uses method self.sub_line to make basic substitutions,\
- uses ? to process tokens"""
- self.__create_tokens()
- copy_obj = copy.Copy(bug_handler = self.__bug_handler)
- if self.__copy:
- copy_obj.copy_file(self.__write_to, "tokenize.data")
- copy_obj.rename(self.__write_to, self.__file)
- os.remove(self.__write_to)
+ '''
+ '''
+ for line in read_obj:
+ #make all replacements
+ line = self.__sub_reg(line)
+ #split token and remove empty tokens
+ tokens = filter(lambda x: len(x) > 0,
+ re.split(self.__splitexp, line))
+ if tokens:
+ write_obj.write('\n'.join(tokens)+'\n')'''
+
+ '''def __neg_unicode_func(self, match_obj):
+ neg_uni_char = int(match_obj.group(1)) * -1
+ # sys.stderr.write(str( neg_uni_char))
+ uni_char = neg_uni_char + 65536
+ return '' + str('%X' % uni_char) + ';'''
\ No newline at end of file
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 41d72d17f1..686d705abb 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -329,6 +329,7 @@ class FileIconProvider(QFileIconProvider):
'epub' : 'epub',
'fb2' : 'fb2',
'rtf' : 'rtf',
+ 'odt' : 'odt',
}
def __init__(self):