diff --git a/resources/templates/rtf.xsl b/resources/templates/rtf.xsl
index 6db1c0388d..0f91d7f4ac 100644
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@@ -98,7 +98,7 @@
-
+
@@ -220,7 +220,7 @@
-
+
padding-left:
@@ -256,15 +256,15 @@
;
-
+
text-decoration:underline
;
-
+
text-align: justify;
@@ -314,7 +314,6 @@
-
@@ -452,6 +451,15 @@
+
+
+
+
+
+
+
+
+
diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py
index c20d880a2f..70f6effcda 100644
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@@ -93,7 +93,7 @@ def get_metadata(stream):
stream.seek(0)
cpg = detect_codepage(stream)
stream.seek(0)
-
+
title_match = title_pat.search(block)
if title_match is not None:
title = decode(title_match.group(1).strip(), cpg)
@@ -162,7 +162,8 @@ def set_metadata(stream, options):
index = src.rindex('}')
return src[:index] + r'{\ '[:-1] + name + ' ' + val + '}}'
src, pos = get_document_info(stream)
- if not src:
+ print 'I was thre'
+ if src is not None:
create_metadata(stream, options)
else:
olen = len(src)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index f08aa76605..c1e649851b 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -41,7 +41,7 @@ border_style_map = {
class InlineClass(etree.XSLTExtension):
- FMTS = ('italics', 'bold', 'underlined', 'strike-through', 'small-caps')
+ FMTS = ('italics', 'bold', 'strike-through', 'small-caps')
def __init__(self, log):
etree.XSLTExtension.__init__(self)
@@ -54,6 +54,9 @@ class InlineClass(etree.XSLTExtension):
for x in self.FMTS:
if input_node.get(x, None) == 'true':
classes.append(x)
+ #underlined is special
+ if input_node.get('underlined', 'false') != 'false':
+ classes.append('underlined')
fs = input_node.get('font-size', False)
if fs:
if fs not in self.font_sizes:
@@ -78,12 +81,13 @@ class RTFInput(InputFormatPlugin):
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'dataxml.xml'
- run_lev, debug_dir = 1, None
+ run_lev, debug_dir, indent_out = 1, None, 0
if getattr(self.opts, 'debug_pipeline', None) is not None:
try:
- os.mkdir(debug_dir)
+ os.mkdir('rtfdebug')
debug_dir = 'rtfdebug'
run_lev = 4
+ indent_out = 1
self.log('Running RTFParser in debug mode')
except:
self.log.warn('Impossible to run RTFParser in debug mode')
@@ -108,7 +112,7 @@ class RTFInput(InputFormatPlugin):
# Indent resulting XML.
# Default is 0 (no indent).
- indent = 1,
+ indent = indent_out,
# Form lists from RTF. Default is 1.
form_lists = 1,
@@ -157,7 +161,8 @@ class RTFInput(InputFormatPlugin):
with open(name, 'wb') as f:
f.write(data)
imap[count] = name
- #open(name+'.hex', 'wb').write(enc)
+ # with open(name+'.hex', 'wb') as f:
+ # f.write(enc)
return self.convert_images(imap)
def convert_images(self, imap):
@@ -319,4 +324,6 @@ class RTFInput(InputFormatPlugin):
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')
-
+#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
+# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
+# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 9ba282129f..e442a1c496 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -372,17 +372,17 @@ class ParseRtf:
old_rtf = old_rtf_obj.check_if_old_rtf()
if old_rtf:
if self.__run_level > 5:
- msg = 'older RTF\n'
+ msg = 'Older RTF\n'
msg += 'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg
if self.__run_level > 1:
- sys.stderr.write('File could be older RTF...\n')
+ sys.stderr.write(_('File could be older RTF...\n'))
if found_destination:
if self.__run_level > 1:
- sys.stderr.write(
+ sys.stderr.write(_(
'File also has newer RTF.\n'
'Will do the best to convert.\n'
- )
+ ))
add_brackets_obj = add_brackets.AddBrackets(
in_file = self.__temp_file,
bug_handler = RtfInvalidCodeException,
diff --git a/src/calibre/ebooks/rtf2xml/check_brackets.py b/src/calibre/ebooks/rtf2xml/check_brackets.py
index 35c7ede435..3b353a65c0 100755
--- a/src/calibre/ebooks/rtf2xml/check_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/check_brackets.py
@@ -53,4 +53,3 @@ class CheckBrackets:
'total number of brackets is %s') % self.__bracket_count
return (False, msg)
return (True, "Brackets match!")
-
diff --git a/src/calibre/ebooks/rtf2xml/configure_txt.py b/src/calibre/ebooks/rtf2xml/configure_txt.py
index cd4c2558b7..27f06d0d19 100755
--- a/src/calibre/ebooks/rtf2xml/configure_txt.py
+++ b/src/calibre/ebooks/rtf2xml/configure_txt.py
@@ -25,7 +25,7 @@ class Configure:
if self.__show_config_file and self.__configuration_file:
sys.stderr.write('configuration file is "%s"\n' % self.__configuration_file)
if self.__show_config_file and not self.__configuration_file:
- sys.stderr.write('No configuraiton file found; using default vaules\n')
+ sys.stderr.write('No configuraiton file found; using default values\n')
if self.__configuration_file:
read_obj = open(self.__configuration_file, 'r')
line_to_read = 1
diff --git a/src/calibre/ebooks/rtf2xml/delete_info.py b/src/calibre/ebooks/rtf2xml/delete_info.py
index d508887d01..74e6b2aba3 100755
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@@ -43,6 +43,7 @@ class DeleteInfo:
'cw%s' % sub_entry
my_changed_string += '\n'
return my_changed_string
+
def __index_see_func(self, my_string):
in_see = 0
bracket_count = 0
@@ -221,6 +230,7 @@ file.
in_see = 1
changed_string += '%s\n' % line
return changed_string, see_string
+
def __index_bookmark_func(self, my_string):
"""
Requries:
@@ -257,6 +267,7 @@ file.
in_bookmark = 1
index_string += '%s\n' % line
return index_string, bookmark_string
+
def __index__format_func(self, my_string):
italics = 0
bold =0
@@ -268,6 +279,7 @@ file.
if token_info == 'cw%s' % main_entry
my_changed_string += '\n'
return my_changed_string
+
def __parse_bookmark_for_toc(self, my_string):
"""
Requires:
@@ -348,6 +361,7 @@ file.
in_bookmark = 1
toc_string += '%s\n' % line
return toc_string, book_start_string, book_end_string
+
def __parse_bookmark_func(self, my_string, type):
"""
Requires:
@@ -362,6 +376,7 @@ file.
my_changed_string = ('mi%s'
'%snone\n' % (type, my_string))
return my_changed_string
+
def __found_toc_index_func(self, line, tag):
"""
Requires:
@@ -377,6 +392,7 @@ file.
self.__cb_count = 0
self.__state = 'toc_index'
self.__tag = tag
+
def __toc_index_func(self, line):
"""
Requires:
@@ -404,6 +420,7 @@ file.
self.__write_obj.write(line)
else:
self.__text_string += line
+
def fix_fields(self):
"""
Requires:
@@ -418,24 +435,19 @@ file.
bookmark.
"""
self.__initiate_values()
- read_obj = open(self.__file)
- self.__write_obj = open(self.__write_to, 'w')
- line_to_read = '1'
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- self.__token_info = line[:16]
- if self.__token_info == 'ob 1:
self.__caps_list.pop()
else:
- sys.stderr.write('Module is hex_2_utf8\n')
- sys.stderr.write('method is __end_caps_func\n')
- sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
+ sys.stderr.write('Module is hex_2_utf8\n'
+ 'method is __end_caps_func\n'
+ 'caps list should be more than one?\n') #self.__in_caps not set
def __text_func(self, line):
"""
@@ -486,8 +482,7 @@ class Hex2Utf8:
hex_num = '\'%s' % hex_num
converted = self.__current_dict.get(hex_num)
if converted is None:
- sys.stderr.write('module is hex_2_ut8\n')
- sys.stderr.write('method is __text_func\n')
+ sys.stderr.write('module is hex_2_ut8\nmethod is __text_func\n')
sys.stderr.write('no hex value for "%s"\n' % hex_num)
else:
the_string += converted
@@ -543,16 +538,15 @@ class Hex2Utf8:
def __convert_body(self):
self.__state = 'body'
with open(self.__file, 'r') as read_obj:
- self.__write_obj = open(self.__write_to, 'w')
- for line in read_obj:
- self.__token_info = line[:16]
- action = self.__body_state_dict.get(self.__state)
- if action is None:
- sys.stderr.write('error no state found in hex_2_utf8',
- self.__state
- )
- action(line)
- self.__write_obj.close()
+ with open(self.__write_to, 'w') as self.__write_obj:
+ for line in read_obj:
+ self.__token_info = line[:16]
+ action = self.__body_state_dict.get(self.__state)
+ if action is None:
+ sys.stderr.write('error no state found in hex_2_utf8',
+ self.__state
+ )
+ action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
diff --git a/src/calibre/ebooks/rtf2xml/info.py b/src/calibre/ebooks/rtf2xml/info.py
index e9c9d5c38d..8eabe25c18 100755
--- a/src/calibre/ebooks/rtf2xml/info.py
+++ b/src/calibre/ebooks/rtf2xml/info.py
@@ -68,7 +68,6 @@ class Info:
'cw tb
+ # underline
+ # can't see why it isn't a char info: 'ul'=>'ci'
+ 'ul' : ('ci', 'underlined tb
'trowd' : ('tb', 'row-def___', self.default_func),
'cell' : ('tb', 'cell______', self.default_func),
'row' : ('tb', 'row_______', self.default_func),
@@ -274,25 +294,6 @@ class ProcessTokens:
'paperh' : ('pa', 'paper-hght', self.divide_by_20),
# annotation => an
'annotation' : ('an', 'annotation', self.default_func),
- # underline
- 'ul' : ('ul', 'underlined bd
'trbrdrh' : ('bd', 'bor-t-r-hi', self.default_func),
'trbrdrv' : ('bd', 'bor-t-r-vi', self.default_func),
@@ -757,7 +758,7 @@ class ProcessTokens:
def process_cw(self, token):
"""Change the value of the control word by determining what dictionary
it belongs to"""
- special = [ '*', ':', '}', '{', '~', '_', '-', ';' ]
+ special = [ '*', ':', '}', '{', '~', '_', '-', ';' ]
##if token != "{" or token != "}":
token = token[1:] # strip off leading \
token = token.replace(" ", "")
@@ -793,7 +794,7 @@ class ProcessTokens:
raise self.__exception_handler, msg
the_index = token.find('\\ ')
- if token is not None and the_index > -1:
+ if token is not None and the_index > -1:
msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\
% line_count
raise self.__exception_handler, msg
@@ -832,4 +833,4 @@ class ProcessTokens:
msg = '\nInvalid RTF: document does not have matching brackets.\n'
raise self.__exception_handler, msg
else:
- return self.__return_code
+ return self.__return_code
\ No newline at end of file
diff --git a/src/calibre/ebooks/rtf2xml/sections.py b/src/calibre/ebooks/rtf2xml/sections.py
index 13bf2c2ddc..a315729525 100755
--- a/src/calibre/ebooks/rtf2xml/sections.py
+++ b/src/calibre/ebooks/rtf2xml/sections.py
@@ -496,7 +496,7 @@ Instead, ingore all section information in a field-block.
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action == None:
- sys.stderr.write('no no matching state in module sections.py\n')
+ sys.stderr.write('no matching state in module sections.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
diff --git a/src/calibre/ebooks/rtf2xml/styles.py b/src/calibre/ebooks/rtf2xml/styles.py
index 55f86e4208..7fcbfb24a3 100755
--- a/src/calibre/ebooks/rtf2xml/styles.py
+++ b/src/calibre/ebooks/rtf2xml/styles.py
@@ -103,8 +103,6 @@ class Styles:
'sect-note_' : 'endnotes-in-section',
# list=> ls
'list-text_' : 'list-text',
- # this line must be wrong because it duplicates an earlier one
- 'list-text_' : 'list-text',
'list______' : 'list',
'list-lev-d' : 'list-level-definition',
'list-cardi' : 'list-cardinal-numbering',
diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index deac793111..10d3fbba6f 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -114,6 +114,7 @@ class Tokenize:
# this is for older RTF
input_file = self.__par_exp.sub('\n\\par \n', input_file)
input_file = self.__cwdigit_exp.sub("\g<1>\n\g<2>", input_file)
+ input_file = self.__cs_ast.sub("\g<1>", input_file)
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data
@@ -163,6 +164,8 @@ class Tokenize:
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#this is for old RTF
self.__par_exp = re.compile(r'(\\\n+|\\ )')
+ #handle improper cs char-style with \* before without {
+ self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
#handle cw using a digit as argument and without space as delimiter
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index 4037ee1be7..4cff648fa5 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -12,6 +12,7 @@ import os, re
from calibre import prepare_string_for_xml, isbytestring
from calibre.ebooks.metadata.opf2 import OPFCreator
+
from calibre.ebooks.conversion.preprocess import DocAnalysis
from calibre.utils.cleantext import clean_ascii_chars
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 51aefe214b..0f5a31e1d7 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -477,17 +477,17 @@ class BIBTEX(CatalogPlugin): # {{{
if opts.bibfile_enc in bibfile_enc :
bibfile_enc = opts.bibfile_enc
else :
- log(" WARNING: incorrect --choose-encoding flag, revert to default")
+ log.warn("Incorrect --choose-encoding flag, revert to default")
bibfile_enc = bibfile_enc[0]
if opts.bibfile_enctag in bibfile_enctag :
bibfile_enctag = opts.bibfile_enctag
else :
- log(" WARNING: incorrect --choose-encoding-configuration flag, revert to default")
+ log.warn("Incorrect --choose-encoding-configuration flag, revert to default")
bibfile_enctag = bibfile_enctag[0]
if opts.bib_entry in bib_entry :
bib_entry = opts.bib_entry
else :
- log(" WARNING: incorrect --entry-type flag, revert to default")
+ log.warn("Incorrect --entry-type flag, revert to default")
bib_entry = bib_entry[0]
if opts.verbose:
@@ -544,7 +544,7 @@ class BIBTEX(CatalogPlugin): # {{{
elif opts.impcit == 'True' :
citation_bibtex= True
else :
- log(" WARNING: incorrect --create-citation, revert to default")
+ log.warn("Incorrect --create-citation, revert to default")
citation_bibtex= True
else :
citation_bibtex= opts.impcit
@@ -556,7 +556,7 @@ class BIBTEX(CatalogPlugin): # {{{
elif opts.addfiles == 'True' :
addfiles_bibtex = True
else :
- log(" WARNING: incorrect --add-files-path, revert to default")
+ log.warn("Incorrect --add-files-path, revert to default")
addfiles_bibtex= True
else :
addfiles_bibtex = opts.addfiles
@@ -574,7 +574,7 @@ class BIBTEX(CatalogPlugin): # {{{
if bib_entry == 'book' :
nb_books = len(filter(check_entry_book_valid, data))
if nb_books < nb_entries :
- log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
+ log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries))
nb_entries = nb_books
# If connected device, add 'On Device' values to data