RTF Input: Handle underlined text. Fixes #845328 (Underlined text in RTF not propogated when converted to ePub)

This commit is contained in:
Kovid Goyal 2011-09-10 08:45:45 -06:00
commit f1867f1128
18 changed files with 139 additions and 117 deletions

View File

@ -98,7 +98,7 @@
<xsl:apply-templates/>
</emph>
</xsl:when>
<xsl:when test = "@underlined">
<xsl:when test = "@underlined and @underlined != 'false'">
<emph rend = "paragraph-emph-underlined">
<xsl:apply-templates/>
</emph>
@ -220,7 +220,7 @@
</xsl:template>
<xsl:template name="parse-styles-attrs">
<!--<xsl:text>position:relative;</xsl:text>-->
<!--<xsl:text>position:relative;</xsl:text>
<xsl:if test="@space-before">
<xsl:text>padding-top:</xsl:text>
<xsl:value-of select="@space-before"/>
@ -230,7 +230,7 @@
<xsl:text>padding-bottom:</xsl:text>
<xsl:value-of select="@space-after"/>
<xsl:text>pt;</xsl:text>
</xsl:if>
</xsl:if>-->
<xsl:if test="@left-indent">
<xsl:text>padding-left:</xsl:text>
<xsl:value-of select="@left-indent"/>
@ -256,15 +256,15 @@
<xsl:value-of select="'italic'"/>
<xsl:text>;</xsl:text>
</xsl:if>
<xsl:if test="@underline and @underline != 'false'">
<xsl:if test="@underlined and @underlined != 'false'">
<xsl:text>text-decoration:underline</xsl:text>
<xsl:text>;</xsl:text>
</xsl:if>
<xsl:if test="@line-spacing">
<!--<xsl:if test="@line-spacing">
<xsl:text>line-height:</xsl:text>
<xsl:value-of select="@line-spacing"/>
<xsl:text>pt;</xsl:text>
</xsl:if>
</xsl:if>-->
<xsl:if test="(@align = 'just')">
<xsl:text>text-align: justify;</xsl:text>
</xsl:if>
@ -314,7 +314,6 @@
</xsl:attribute>
<xsl:apply-templates/>
</xsl:element>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
@ -453,6 +452,15 @@
</xsl:element>
</xsl:template>
<xsl:template match = "rtf:field[@type='bookmark-start']">
<xsl:element name ="a">
<xsl:attribute name = "id">
<xsl:value-of select = "@number"/>
</xsl:attribute>
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
<xsl:template match = "rtf:field">
<xsl:apply-templates/>
</xsl:template>

View File

@ -162,7 +162,8 @@ def set_metadata(stream, options):
index = src.rindex('}')
return src[:index] + r'{\ '[:-1] + name + ' ' + val + '}}'
src, pos = get_document_info(stream)
if not src:
print 'I was thre'
if src is not None:
create_metadata(stream, options)
else:
olen = len(src)

View File

@ -41,7 +41,7 @@ border_style_map = {
class InlineClass(etree.XSLTExtension):
FMTS = ('italics', 'bold', 'underlined', 'strike-through', 'small-caps')
FMTS = ('italics', 'bold', 'strike-through', 'small-caps')
def __init__(self, log):
etree.XSLTExtension.__init__(self)
@ -54,6 +54,9 @@ class InlineClass(etree.XSLTExtension):
for x in self.FMTS:
if input_node.get(x, None) == 'true':
classes.append(x)
#underlined is special
if input_node.get('underlined', 'false') != 'false':
classes.append('underlined')
fs = input_node.get('font-size', False)
if fs:
if fs not in self.font_sizes:
@ -78,12 +81,13 @@ class RTFInput(InputFormatPlugin):
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'dataxml.xml'
run_lev, debug_dir = 1, None
run_lev, debug_dir, indent_out = 1, None, 0
if getattr(self.opts, 'debug_pipeline', None) is not None:
try:
os.mkdir(debug_dir)
os.mkdir('rtfdebug')
debug_dir = 'rtfdebug'
run_lev = 4
indent_out = 1
self.log('Running RTFParser in debug mode')
except:
self.log.warn('Impossible to run RTFParser in debug mode')
@ -108,7 +112,7 @@ class RTFInput(InputFormatPlugin):
# Indent resulting XML.
# Default is 0 (no indent).
indent = 1,
indent = indent_out,
# Form lists from RTF. Default is 1.
form_lists = 1,
@ -157,7 +161,8 @@ class RTFInput(InputFormatPlugin):
with open(name, 'wb') as f:
f.write(data)
imap[count] = name
#open(name+'.hex', 'wb').write(enc)
# with open(name+'.hex', 'wb') as f:
# f.write(enc)
return self.convert_images(imap)
def convert_images(self, imap):
@ -319,4 +324,6 @@ class RTFInput(InputFormatPlugin):
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')
#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"

View File

@ -372,17 +372,17 @@ class ParseRtf:
old_rtf = old_rtf_obj.check_if_old_rtf()
if old_rtf:
if self.__run_level > 5:
msg = 'older RTF\n'
msg = 'Older RTF\n'
msg += 'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg
if self.__run_level > 1:
sys.stderr.write('File could be older RTF...\n')
sys.stderr.write(_('File could be older RTF...\n'))
if found_destination:
if self.__run_level > 1:
sys.stderr.write(
sys.stderr.write(_(
'File also has newer RTF.\n'
'Will do the best to convert.\n'
)
))
add_brackets_obj = add_brackets.AddBrackets(
in_file = self.__temp_file,
bug_handler = RtfInvalidCodeException,

View File

@ -53,4 +53,3 @@ class CheckBrackets:
'total number of brackets is %s') % self.__bracket_count
return (False, msg)
return (True, "Brackets match!")

View File

@ -25,7 +25,7 @@ class Configure:
if self.__show_config_file and self.__configuration_file:
sys.stderr.write('configuration file is "%s"\n' % self.__configuration_file)
if self.__show_config_file and not self.__configuration_file:
sys.stderr.write('No configuraiton file found; using default vaules\n')
sys.stderr.write('No configuraiton file found; using default values\n')
if self.__configuration_file:
read_obj = open(self.__configuration_file, 'r')
line_to_read = 1

View File

@ -43,6 +43,7 @@ class DeleteInfo:
'cw<it<listtable_',
'cw<it<revi-table',
'cw<ls<list-lev-d',
# Field allowed
'cw<fd<field-inst',
'cw<an<book-mk-st',
'cw<an<book-mk-en',
@ -81,7 +82,7 @@ class DeleteInfo:
self.__ob = line
return False
else:
# write previous bracket, since didn't fine asterisk
# write previous bracket, since didn't find asterisk
if self.__ob:
self.__write_obj.write(self.__ob)
self.__ob = 0
@ -104,7 +105,7 @@ class DeleteInfo:
If you find that you are in a delete group, and the previous
token in not an open bracket (self.__ob = 0), that means
that the delete group is nested inside another acceptable
detination group. In this case, you have alrady written
detination group. In this case, you have already written
the open bracket, so you will need to write the closed one
as well.
"""

View File

@ -10,8 +10,10 @@
# #
# #
#########################################################################
import sys, os, tempfile, re
import sys, os, tempfile, re
from calibre.ebooks.rtf2xml import field_strings, copy
class FieldsSmall:
"""
=================
@ -19,7 +21,7 @@ Purpose
=================
Write tags for bookmarks, index and toc entry fields in a tokenized file.
This module does not handle toc or index tables. (This module won't be any
use to use to you unless you use it as part of the other modules.)
use to you unless you use it as part of the other modules.)
-----------
Method
-----------
@ -50,6 +52,7 @@ file.
self.__copy = copy
self.__write_to = tempfile.mktemp()
self.__run_level = run_level
def __initiate_values(self):
"""
Initiate all values.
@ -76,6 +79,7 @@ file.
tx = 'tx<nu<__________<(.*?)'
reg_st = ob + bk_st + tx + cb
self.__book_start = re.compile(r'%s' % reg_st)
def __before_body_func(self, line):
"""
Requires:
@ -89,6 +93,7 @@ file.
if self.__token_info == 'mi<mk<body-open_':
self.__state = 'body'
self.__write_obj.write(line)
def __body_func(self, line):
"""
Requires:
@ -105,6 +110,7 @@ file.
action(line, tag)
else:
self.__write_obj.write(line)
def __found_bookmark_func(self, line, tag):
"""
Requires:
@ -120,6 +126,7 @@ file.
self.__cb_count = 0
self.__state = 'bookmark'
self.__type_of_bookmark = tag
def __bookmark_func(self, line):
"""
Requires:
@ -148,6 +155,7 @@ file.
self.__write_obj.write(line)
elif line[0:2] == 'tx':
self.__text_string += line[17:-1]
def __parse_index_func(self, my_string):
"""
Requires:
@ -196,6 +204,7 @@ file.
my_changed_string += '<sub-entry>%s' % sub_entry
my_changed_string += '\n'
return my_changed_string
def __index_see_func(self, my_string):
in_see = 0
bracket_count = 0
@ -221,6 +230,7 @@ file.
in_see = 1
changed_string += '%s\n' % line
return changed_string, see_string
def __index_bookmark_func(self, my_string):
"""
Requries:
@ -257,6 +267,7 @@ file.
in_bookmark = 1
index_string += '%s\n' % line
return index_string, bookmark_string
def __index__format_func(self, my_string):
italics = 0
bold =0
@ -268,6 +279,7 @@ file.
if token_info == 'cw<in<index-ital':
italics = 1
return italics, bold
def __parse_toc_func(self, my_string):
"""
Requires:
@ -303,6 +315,7 @@ file.
my_changed_string += '<main-entry>%s' % main_entry
my_changed_string += '\n'
return my_changed_string
def __parse_bookmark_for_toc(self, my_string):
"""
Requires:
@ -348,6 +361,7 @@ file.
in_bookmark = 1
toc_string += '%s\n' % line
return toc_string, book_start_string, book_end_string
def __parse_bookmark_func(self, my_string, type):
"""
Requires:
@ -362,6 +376,7 @@ file.
my_changed_string = ('mi<tg<empty-att_<field<type>%s'
'<number>%s<update>none\n' % (type, my_string))
return my_changed_string
def __found_toc_index_func(self, line, tag):
"""
Requires:
@ -377,6 +392,7 @@ file.
self.__cb_count = 0
self.__state = 'toc_index'
self.__tag = tag
def __toc_index_func(self, line):
"""
Requires:
@ -404,6 +420,7 @@ file.
self.__write_obj.write(line)
else:
self.__text_string += line
def fix_fields(self):
"""
Requires:
@ -418,24 +435,19 @@ file.
bookmark.
"""
self.__initiate_values()
read_obj = open(self.__file)
self.__write_obj = open(self.__write_to, 'w')
line_to_read = '1'
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state)
if action == None:
sys.stderr.write('no no matching state in module fields_small.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj:
self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state)
if action is None:
sys.stderr.write('No matching state in module fields_small.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "fields_small.data")

View File

@ -25,8 +25,6 @@ class GetCharMap:
'char_file'--the file with the mappings
Returns:
nothing
@ -57,7 +55,6 @@ class GetCharMap:
fields[1].replace('\\colon', ':')
map_dict[fields[1]] = fields[3]
if not found_map:
msg = 'no map found\nmap is "%s"\n'%(map,)
raise self.__bug_handler, msg

View File

@ -11,8 +11,10 @@
# #
#########################################################################
import sys, os, tempfile, cStringIO
from calibre.ebooks.rtf2xml import get_char_map, copy
from calibre.ebooks.rtf2xml.char_set import char_set
class Hex2Utf8:
"""
Convert Microsoft hexidecimal numbers to utf-8
@ -108,7 +110,7 @@ class Hex2Utf8:
"""
self.__file=file
self.__copy = copy
if area_to_convert != 'preamble' and area_to_convert != 'body':
if area_to_convert not in ('preamble', 'body'):
msg = (
'in module "hex_2_utf8.py\n'
'"area_to_convert" must be "body" or "preamble"\n'
@ -136,12 +138,12 @@ class Hex2Utf8:
Set values, including those for the dictionaries.
The file that contains the maps is broken down into many different
sets. For example, for the Symbol font, there is the standard part for
hexidecimal numbers, and the the part for Microsoft charcters. Read
hexidecimal numbers, and the part for Microsoft characters. Read
each part in, and then combine them.
"""
# the default encoding system, the lower map for characters 0 through
# 128, and the encoding system for Microsoft characters.
# New on 2004-05-8: the self.__char_map is not in diretory with other
# New on 2004-05-8: the self.__char_map is not in directory with other
# modules
self.__char_file = cStringIO.StringIO(char_set)
char_map_obj = get_char_map.GetCharMap(
@ -188,7 +190,6 @@ class Hex2Utf8:
'body' : self.__body_func,
'mi<mk<body-open_' : self.__found_body_func,
'tx<hx<__________' : self.__hex_text_func,
# 'tx<nu<__________' : self.__text_func,
}
self.__body_state_dict = {
'preamble' : self.__preamble_for_body_func,
@ -228,9 +229,7 @@ class Hex2Utf8:
font = self.__current_dict_name
if self.__convert_caps\
and self.__caps_list[-1] == 'true'\
and font != 'Symbol'\
and font != 'Wingdings'\
and font != 'Zapf Dingbats':
and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
converted = self.__utf_token_to_caps_func(converted)
self.__write_obj.write(
'tx<ut<__________<%s\n' % converted
@ -240,9 +239,7 @@ class Hex2Utf8:
font = self.__current_dict_name
if self.__convert_caps\
and self.__caps_list[-1] == 'true'\
and font != 'Symbol'\
and font != 'Wingdings'\
and font != 'Zapf Dingbats':
and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
converted = converted.upper()
self.__write_obj.write(
'tx<nu<__________<%s\n' % converted
@ -282,17 +279,16 @@ class Hex2Utf8:
def __convert_preamble(self):
self.__state = 'preamble'
self.__write_obj = open(self.__write_to, 'w')
with open(self.__file, 'r') as read_obj:
for line in read_obj:
self.__token_info = line[:16]
action = self.__preamble_state_dict.get(self.__state)
if action is None:
sys.stderr.write(_('error no state found in hex_2_utf8'),
self.__state
)
action(line)
self.__write_obj.close()
with open(self.__write_to, 'w') as self.__write_obj:
with open(self.__file, 'r') as read_obj:
for line in read_obj:
self.__token_info = line[:16]
action = self.__preamble_state_dict.get(self.__state)
if action is None:
sys.stderr.write('error no state found in hex_2_utf8',
self.__state
)
action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
@ -461,9 +457,9 @@ class Hex2Utf8:
if len(self.__caps_list) > 1:
self.__caps_list.pop()
else:
sys.stderr.write('Module is hex_2_utf8\n')
sys.stderr.write('method is __end_caps_func\n')
sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
sys.stderr.write('Module is hex_2_utf8\n'
'method is __end_caps_func\n'
'caps list should be more than one?\n') #self.__in_caps not set
def __text_func(self, line):
"""
@ -486,8 +482,7 @@ class Hex2Utf8:
hex_num = '\'%s' % hex_num
converted = self.__current_dict.get(hex_num)
if converted is None:
sys.stderr.write('module is hex_2_ut8\n')
sys.stderr.write('method is __text_func\n')
sys.stderr.write('module is hex_2_ut8\nmethod is __text_func\n')
sys.stderr.write('no hex value for "%s"\n' % hex_num)
else:
the_string += converted
@ -543,16 +538,15 @@ class Hex2Utf8:
def __convert_body(self):
self.__state = 'body'
with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w')
for line in read_obj:
self.__token_info = line[:16]
action = self.__body_state_dict.get(self.__state)
if action is None:
sys.stderr.write('error no state found in hex_2_utf8',
self.__state
)
action(line)
self.__write_obj.close()
with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj:
self.__token_info = line[:16]
action = self.__body_state_dict.get(self.__state)
if action is None:
sys.stderr.write('error no state found in hex_2_utf8',
self.__state
)
action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "body_utf_convert.data")

View File

@ -68,7 +68,6 @@ class Info:
'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'),
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
'cw<di<edit-time_' : (self.__found_tag_with_tokens_func, 'editing-time'),
'cw<di<print-time' : (self.__found_tag_with_tokens_func, 'printing-time'),
'cw<di<backuptime' : (self.__found_tag_with_tokens_func, 'backup-time'),
@ -77,6 +76,7 @@ class Info:
'cw<di<numofchrws' : (self.__single_field_func, 'number-of-characters-without-space'),
'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'),
'cw<di<version___' : (self.__single_field_func, 'version'),
'cw<di<edit-time_' : (self.__single_field_func, 'editing-time'),
'cw<di<intern-ver' : (self.__single_field_func, 'internal-version-number'),
'cw<di<internalID' : (self.__single_field_func, 'internal-id-number'),
}

View File

@ -411,7 +411,7 @@ class Inline:
self.__set_list_func(line)
action = self.__state_dict.get(self.__state)
if action is None:
sys.stderr.write('No matching state in module inline_for_lists.py\n')
sys.stderr.write('No matching state in module inline.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)

View File

@ -214,7 +214,27 @@ class ProcessTokens:
'nosupersub' : ('ci', 'no-su-supe', self.__no_sup_sub_func),
'up' : ('ci', 'font-up___', self.divide_by_2),
'v' : ('ci', 'hidden____', self.default_func),
# table => tb
# underline
# can't see why it isn't a char info: 'ul'=>'ci'
'ul' : ('ci', 'underlined<continous', self.two_part_func),
'uld' : ('ci', 'underlined<dotted', self.two_part_func),
'uldash' : ('ci', 'underlined<dash', self.two_part_func),
'uldashd' : ('ci', 'underlined<dash-dot', self.two_part_func),
'uldashdd' : ('ci', 'underlined<dash-dot-dot', self.two_part_func),
'uldb' : ('ci', 'underlined<double', self.two_part_func),
'ulhwave' : ('ci', 'underlined<heavy-wave', self.two_part_func),
'ulldash' : ('ci', 'underlined<long-dash', self.two_part_func),
'ulth' : ('ci', 'underlined<thich', self.two_part_func),
'ulthd' : ('ci', 'underlined<thick-dotted', self.two_part_func),
'ulthdash' : ('ci', 'underlined<thick-dash', self.two_part_func),
'ulthdashd' : ('ci', 'underlined<thick-dash-dot', self.two_part_func),
'ulthdashdd' : ('ci', 'underlined<thick-dash-dot-dot', self.two_part_func),
'ulthldash' : ('ci', 'underlined<thick-long-dash', self.two_part_func),
'ululdbwave' : ('ci', 'underlined<double-wave', self.two_part_func),
'ulw' : ('ci', 'underlined<word', self.two_part_func),
'ulwave' : ('ci', 'underlined<wave', self.two_part_func),
'ulnone' : ('ci', 'underlined<false', self.two_part_func),
# table => tb
'trowd' : ('tb', 'row-def___', self.default_func),
'cell' : ('tb', 'cell______', self.default_func),
'row' : ('tb', 'row_______', self.default_func),
@ -274,25 +294,6 @@ class ProcessTokens:
'paperh' : ('pa', 'paper-hght', self.divide_by_20),
# annotation => an
'annotation' : ('an', 'annotation', self.default_func),
# underline
'ul' : ('ul', 'underlined<continous', self.two_part_func),
'uld' : ('ul', 'underlined<dotted', self.two_part_func),
'uldash' : ('ul', 'underlined<dash', self.two_part_func),
'uldashd' : ('ul', 'underlined<dash-dot', self.two_part_func),
'uldashdd' : ('ul', 'underlined<dash-dot-dot', self.two_part_func),
'uldb' : ('ul', 'underlined<double', self.two_part_func),
'ulhwave' : ('ul', 'underlined<heavy-wave', self.two_part_func),
'ulldash' : ('ul', 'underlined<long-dash', self.two_part_func),
'ulth' : ('ul', 'underlined<thich', self.two_part_func),
'ulthd' : ('ul', 'underlined<thick-dotted', self.two_part_func),
'ulthdash' : ('ul', 'underlined<thick-dash', self.two_part_func),
'ulthdashd' : ('ul', 'underlined<thick-dash-dot', self.two_part_func),
'ulthdashdd' : ('ul', 'underlined<thick-dash-dot-dot', self.two_part_func),
'ulthldash' : ('ul', 'underlined<thick-long-dash', self.two_part_func),
'ululdbwave' : ('ul', 'underlined<double-wave', self.two_part_func),
'ulw' : ('ul', 'underlined<word', self.two_part_func),
'ulwave' : ('ul', 'underlined<wave', self.two_part_func),
'ulnone' : ('ul', 'underlined<false', self.two_part_func),
# border => bd
'trbrdrh' : ('bd', 'bor-t-r-hi', self.default_func),
'trbrdrv' : ('bd', 'bor-t-r-vi', self.default_func),
@ -757,7 +758,7 @@ class ProcessTokens:
def process_cw(self, token):
"""Change the value of the control word by determining what dictionary
it belongs to"""
special = [ '*', ':', '}', '{', '~', '_', '-', ';' ]
special = [ '*', ':', '}', '{', '~', '_', '-', ';' ]
##if token != "{" or token != "}":
token = token[1:] # strip off leading \
token = token.replace(" ", "")
@ -793,7 +794,7 @@ class ProcessTokens:
raise self.__exception_handler, msg
the_index = token.find('\\ ')
if token is not None and the_index > -1:
if token is not None and the_index > -1:
msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\
% line_count
raise self.__exception_handler, msg

View File

@ -496,7 +496,7 @@ Instead, ingore all section information in a field-block.
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action == None:
sys.stderr.write('no no matching state in module sections.py\n')
sys.stderr.write('no matching state in module sections.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()

View File

@ -103,8 +103,6 @@ class Styles:
'sect-note_' : 'endnotes-in-section',
# list=> ls
'list-text_' : 'list-text',
# this line must be wrong because it duplicates an earlier one
'list-text_' : 'list-text',
'list______' : 'list',
'list-lev-d' : 'list-level-definition',
'list-cardi' : 'list-cardinal-numbering',

View File

@ -114,6 +114,7 @@ class Tokenize:
# this is for older RTF
input_file = self.__par_exp.sub('\n\\par \n', input_file)
input_file = self.__cwdigit_exp.sub("\g<1>\n\g<2>", input_file)
input_file = self.__cs_ast.sub("\g<1>", input_file)
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data
@ -163,6 +164,8 @@ class Tokenize:
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#this is for old RTF
self.__par_exp = re.compile(r'(\\\n+|\\ )')
#handle improper cs char-style with \* before without {
self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
#handle cw using a digit as argument and without space as delimiter
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")

View File

@ -12,6 +12,7 @@ import os, re
from calibre import prepare_string_for_xml, isbytestring
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.conversion.preprocess import DocAnalysis
from calibre.utils.cleantext import clean_ascii_chars

View File

@ -477,17 +477,17 @@ class BIBTEX(CatalogPlugin): # {{{
if opts.bibfile_enc in bibfile_enc :
bibfile_enc = opts.bibfile_enc
else :
log(" WARNING: incorrect --choose-encoding flag, revert to default")
log.warn("Incorrect --choose-encoding flag, revert to default")
bibfile_enc = bibfile_enc[0]
if opts.bibfile_enctag in bibfile_enctag :
bibfile_enctag = opts.bibfile_enctag
else :
log(" WARNING: incorrect --choose-encoding-configuration flag, revert to default")
log.warn("Incorrect --choose-encoding-configuration flag, revert to default")
bibfile_enctag = bibfile_enctag[0]
if opts.bib_entry in bib_entry :
bib_entry = opts.bib_entry
else :
log(" WARNING: incorrect --entry-type flag, revert to default")
log.warn("Incorrect --entry-type flag, revert to default")
bib_entry = bib_entry[0]
if opts.verbose:
@ -544,7 +544,7 @@ class BIBTEX(CatalogPlugin): # {{{
elif opts.impcit == 'True' :
citation_bibtex= True
else :
log(" WARNING: incorrect --create-citation, revert to default")
log.warn("Incorrect --create-citation, revert to default")
citation_bibtex= True
else :
citation_bibtex= opts.impcit
@ -556,7 +556,7 @@ class BIBTEX(CatalogPlugin): # {{{
elif opts.addfiles == 'True' :
addfiles_bibtex = True
else :
log(" WARNING: incorrect --add-files-path, revert to default")
log.warn("Incorrect --add-files-path, revert to default")
addfiles_bibtex= True
else :
addfiles_bibtex = opts.addfiles
@ -574,7 +574,7 @@ class BIBTEX(CatalogPlugin): # {{{
if bib_entry == 'book' :
nb_books = len(filter(check_entry_book_valid, data))
if nb_books < nb_entries :
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries))
nb_entries = nb_books
# If connected device, add 'On Device' values to data