mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
RTF Input: Handle underlined text. Fixes #845328 (Underlined text in RTF not propogated when converted to ePub)
This commit is contained in:
commit
f1867f1128
@ -98,7 +98,7 @@
|
||||
<xsl:apply-templates/>
|
||||
</emph>
|
||||
</xsl:when>
|
||||
<xsl:when test = "@underlined">
|
||||
<xsl:when test = "@underlined and @underlined != 'false'">
|
||||
<emph rend = "paragraph-emph-underlined">
|
||||
<xsl:apply-templates/>
|
||||
</emph>
|
||||
@ -220,7 +220,7 @@
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="parse-styles-attrs">
|
||||
<!--<xsl:text>position:relative;</xsl:text>-->
|
||||
<!--<xsl:text>position:relative;</xsl:text>
|
||||
<xsl:if test="@space-before">
|
||||
<xsl:text>padding-top:</xsl:text>
|
||||
<xsl:value-of select="@space-before"/>
|
||||
@ -230,7 +230,7 @@
|
||||
<xsl:text>padding-bottom:</xsl:text>
|
||||
<xsl:value-of select="@space-after"/>
|
||||
<xsl:text>pt;</xsl:text>
|
||||
</xsl:if>
|
||||
</xsl:if>-->
|
||||
<xsl:if test="@left-indent">
|
||||
<xsl:text>padding-left:</xsl:text>
|
||||
<xsl:value-of select="@left-indent"/>
|
||||
@ -256,15 +256,15 @@
|
||||
<xsl:value-of select="'italic'"/>
|
||||
<xsl:text>;</xsl:text>
|
||||
</xsl:if>
|
||||
<xsl:if test="@underline and @underline != 'false'">
|
||||
<xsl:if test="@underlined and @underlined != 'false'">
|
||||
<xsl:text>text-decoration:underline</xsl:text>
|
||||
<xsl:text>;</xsl:text>
|
||||
</xsl:if>
|
||||
<xsl:if test="@line-spacing">
|
||||
<!--<xsl:if test="@line-spacing">
|
||||
<xsl:text>line-height:</xsl:text>
|
||||
<xsl:value-of select="@line-spacing"/>
|
||||
<xsl:text>pt;</xsl:text>
|
||||
</xsl:if>
|
||||
</xsl:if>-->
|
||||
<xsl:if test="(@align = 'just')">
|
||||
<xsl:text>text-align: justify;</xsl:text>
|
||||
</xsl:if>
|
||||
@ -314,7 +314,6 @@
|
||||
</xsl:attribute>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
@ -453,6 +452,15 @@
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match = "rtf:field[@type='bookmark-start']">
|
||||
<xsl:element name ="a">
|
||||
<xsl:attribute name = "id">
|
||||
<xsl:value-of select = "@number"/>
|
||||
</xsl:attribute>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match = "rtf:field">
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
|
@ -162,7 +162,8 @@ def set_metadata(stream, options):
|
||||
index = src.rindex('}')
|
||||
return src[:index] + r'{\ '[:-1] + name + ' ' + val + '}}'
|
||||
src, pos = get_document_info(stream)
|
||||
if not src:
|
||||
print 'I was thre'
|
||||
if src is not None:
|
||||
create_metadata(stream, options)
|
||||
else:
|
||||
olen = len(src)
|
||||
|
@ -41,7 +41,7 @@ border_style_map = {
|
||||
|
||||
class InlineClass(etree.XSLTExtension):
|
||||
|
||||
FMTS = ('italics', 'bold', 'underlined', 'strike-through', 'small-caps')
|
||||
FMTS = ('italics', 'bold', 'strike-through', 'small-caps')
|
||||
|
||||
def __init__(self, log):
|
||||
etree.XSLTExtension.__init__(self)
|
||||
@ -54,6 +54,9 @@ class InlineClass(etree.XSLTExtension):
|
||||
for x in self.FMTS:
|
||||
if input_node.get(x, None) == 'true':
|
||||
classes.append(x)
|
||||
#underlined is special
|
||||
if input_node.get('underlined', 'false') != 'false':
|
||||
classes.append('underlined')
|
||||
fs = input_node.get('font-size', False)
|
||||
if fs:
|
||||
if fs not in self.font_sizes:
|
||||
@ -78,12 +81,13 @@ class RTFInput(InputFormatPlugin):
|
||||
def generate_xml(self, stream):
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||
ofile = 'dataxml.xml'
|
||||
run_lev, debug_dir = 1, None
|
||||
run_lev, debug_dir, indent_out = 1, None, 0
|
||||
if getattr(self.opts, 'debug_pipeline', None) is not None:
|
||||
try:
|
||||
os.mkdir(debug_dir)
|
||||
os.mkdir('rtfdebug')
|
||||
debug_dir = 'rtfdebug'
|
||||
run_lev = 4
|
||||
indent_out = 1
|
||||
self.log('Running RTFParser in debug mode')
|
||||
except:
|
||||
self.log.warn('Impossible to run RTFParser in debug mode')
|
||||
@ -108,7 +112,7 @@ class RTFInput(InputFormatPlugin):
|
||||
|
||||
# Indent resulting XML.
|
||||
# Default is 0 (no indent).
|
||||
indent = 1,
|
||||
indent = indent_out,
|
||||
|
||||
# Form lists from RTF. Default is 1.
|
||||
form_lists = 1,
|
||||
@ -157,7 +161,8 @@ class RTFInput(InputFormatPlugin):
|
||||
with open(name, 'wb') as f:
|
||||
f.write(data)
|
||||
imap[count] = name
|
||||
#open(name+'.hex', 'wb').write(enc)
|
||||
# with open(name+'.hex', 'wb') as f:
|
||||
# f.write(enc)
|
||||
return self.convert_images(imap)
|
||||
|
||||
def convert_images(self, imap):
|
||||
@ -319,4 +324,6 @@ class RTFInput(InputFormatPlugin):
|
||||
opf.render(open('metadata.opf', 'wb'))
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
|
||||
#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
|
||||
# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
|
||||
# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"
|
||||
|
@ -372,17 +372,17 @@ class ParseRtf:
|
||||
old_rtf = old_rtf_obj.check_if_old_rtf()
|
||||
if old_rtf:
|
||||
if self.__run_level > 5:
|
||||
msg = 'older RTF\n'
|
||||
msg = 'Older RTF\n'
|
||||
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
||||
raise RtfInvalidCodeException, msg
|
||||
if self.__run_level > 1:
|
||||
sys.stderr.write('File could be older RTF...\n')
|
||||
sys.stderr.write(_('File could be older RTF...\n'))
|
||||
if found_destination:
|
||||
if self.__run_level > 1:
|
||||
sys.stderr.write(
|
||||
sys.stderr.write(_(
|
||||
'File also has newer RTF.\n'
|
||||
'Will do the best to convert.\n'
|
||||
)
|
||||
))
|
||||
add_brackets_obj = add_brackets.AddBrackets(
|
||||
in_file = self.__temp_file,
|
||||
bug_handler = RtfInvalidCodeException,
|
||||
|
@ -53,4 +53,3 @@ class CheckBrackets:
|
||||
'total number of brackets is %s') % self.__bracket_count
|
||||
return (False, msg)
|
||||
return (True, "Brackets match!")
|
||||
|
||||
|
@ -25,7 +25,7 @@ class Configure:
|
||||
if self.__show_config_file and self.__configuration_file:
|
||||
sys.stderr.write('configuration file is "%s"\n' % self.__configuration_file)
|
||||
if self.__show_config_file and not self.__configuration_file:
|
||||
sys.stderr.write('No configuraiton file found; using default vaules\n')
|
||||
sys.stderr.write('No configuraiton file found; using default values\n')
|
||||
if self.__configuration_file:
|
||||
read_obj = open(self.__configuration_file, 'r')
|
||||
line_to_read = 1
|
||||
|
@ -43,6 +43,7 @@ class DeleteInfo:
|
||||
'cw<it<listtable_',
|
||||
'cw<it<revi-table',
|
||||
'cw<ls<list-lev-d',
|
||||
# Field allowed
|
||||
'cw<fd<field-inst',
|
||||
'cw<an<book-mk-st',
|
||||
'cw<an<book-mk-en',
|
||||
@ -81,7 +82,7 @@ class DeleteInfo:
|
||||
self.__ob = line
|
||||
return False
|
||||
else:
|
||||
# write previous bracket, since didn't fine asterisk
|
||||
# write previous bracket, since didn't find asterisk
|
||||
if self.__ob:
|
||||
self.__write_obj.write(self.__ob)
|
||||
self.__ob = 0
|
||||
@ -104,7 +105,7 @@ class DeleteInfo:
|
||||
If you find that you are in a delete group, and the previous
|
||||
token in not an open bracket (self.__ob = 0), that means
|
||||
that the delete group is nested inside another acceptable
|
||||
detination group. In this case, you have alrady written
|
||||
detination group. In this case, you have already written
|
||||
the open bracket, so you will need to write the closed one
|
||||
as well.
|
||||
"""
|
||||
|
@ -11,7 +11,9 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, tempfile, re
|
||||
|
||||
from calibre.ebooks.rtf2xml import field_strings, copy
|
||||
|
||||
class FieldsSmall:
|
||||
"""
|
||||
=================
|
||||
@ -19,7 +21,7 @@ Purpose
|
||||
=================
|
||||
Write tags for bookmarks, index and toc entry fields in a tokenized file.
|
||||
This module does not handle toc or index tables. (This module won't be any
|
||||
use to use to you unless you use it as part of the other modules.)
|
||||
use to you unless you use it as part of the other modules.)
|
||||
-----------
|
||||
Method
|
||||
-----------
|
||||
@ -50,6 +52,7 @@ file.
|
||||
self.__copy = copy
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__run_level = run_level
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
Initiate all values.
|
||||
@ -76,6 +79,7 @@ file.
|
||||
tx = 'tx<nu<__________<(.*?)'
|
||||
reg_st = ob + bk_st + tx + cb
|
||||
self.__book_start = re.compile(r'%s' % reg_st)
|
||||
|
||||
def __before_body_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -89,6 +93,7 @@ file.
|
||||
if self.__token_info == 'mi<mk<body-open_':
|
||||
self.__state = 'body'
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __body_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -105,6 +110,7 @@ file.
|
||||
action(line, tag)
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __found_bookmark_func(self, line, tag):
|
||||
"""
|
||||
Requires:
|
||||
@ -120,6 +126,7 @@ file.
|
||||
self.__cb_count = 0
|
||||
self.__state = 'bookmark'
|
||||
self.__type_of_bookmark = tag
|
||||
|
||||
def __bookmark_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -148,6 +155,7 @@ file.
|
||||
self.__write_obj.write(line)
|
||||
elif line[0:2] == 'tx':
|
||||
self.__text_string += line[17:-1]
|
||||
|
||||
def __parse_index_func(self, my_string):
|
||||
"""
|
||||
Requires:
|
||||
@ -196,6 +204,7 @@ file.
|
||||
my_changed_string += '<sub-entry>%s' % sub_entry
|
||||
my_changed_string += '\n'
|
||||
return my_changed_string
|
||||
|
||||
def __index_see_func(self, my_string):
|
||||
in_see = 0
|
||||
bracket_count = 0
|
||||
@ -221,6 +230,7 @@ file.
|
||||
in_see = 1
|
||||
changed_string += '%s\n' % line
|
||||
return changed_string, see_string
|
||||
|
||||
def __index_bookmark_func(self, my_string):
|
||||
"""
|
||||
Requries:
|
||||
@ -257,6 +267,7 @@ file.
|
||||
in_bookmark = 1
|
||||
index_string += '%s\n' % line
|
||||
return index_string, bookmark_string
|
||||
|
||||
def __index__format_func(self, my_string):
|
||||
italics = 0
|
||||
bold =0
|
||||
@ -268,6 +279,7 @@ file.
|
||||
if token_info == 'cw<in<index-ital':
|
||||
italics = 1
|
||||
return italics, bold
|
||||
|
||||
def __parse_toc_func(self, my_string):
|
||||
"""
|
||||
Requires:
|
||||
@ -303,6 +315,7 @@ file.
|
||||
my_changed_string += '<main-entry>%s' % main_entry
|
||||
my_changed_string += '\n'
|
||||
return my_changed_string
|
||||
|
||||
def __parse_bookmark_for_toc(self, my_string):
|
||||
"""
|
||||
Requires:
|
||||
@ -348,6 +361,7 @@ file.
|
||||
in_bookmark = 1
|
||||
toc_string += '%s\n' % line
|
||||
return toc_string, book_start_string, book_end_string
|
||||
|
||||
def __parse_bookmark_func(self, my_string, type):
|
||||
"""
|
||||
Requires:
|
||||
@ -362,6 +376,7 @@ file.
|
||||
my_changed_string = ('mi<tg<empty-att_<field<type>%s'
|
||||
'<number>%s<update>none\n' % (type, my_string))
|
||||
return my_changed_string
|
||||
|
||||
def __found_toc_index_func(self, line, tag):
|
||||
"""
|
||||
Requires:
|
||||
@ -377,6 +392,7 @@ file.
|
||||
self.__cb_count = 0
|
||||
self.__state = 'toc_index'
|
||||
self.__tag = tag
|
||||
|
||||
def __toc_index_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -404,6 +420,7 @@ file.
|
||||
self.__write_obj.write(line)
|
||||
else:
|
||||
self.__text_string += line
|
||||
|
||||
def fix_fields(self):
|
||||
"""
|
||||
Requires:
|
||||
@ -418,24 +435,19 @@ file.
|
||||
bookmark.
|
||||
"""
|
||||
self.__initiate_values()
|
||||
read_obj = open(self.__file)
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = '1'
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
self.__ob_count = line[-5:-1]
|
||||
if self.__token_info == 'cb<nu<clos-brack':
|
||||
self.__cb_count = line[-5:-1]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action == None:
|
||||
sys.stderr.write('no no matching state in module fields_small.py\n')
|
||||
if action is None:
|
||||
sys.stderr.write('No matching state in module fields_small.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
action(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "fields_small.data")
|
||||
|
@ -25,8 +25,6 @@ class GetCharMap:
|
||||
|
||||
'char_file'--the file with the mappings
|
||||
|
||||
|
||||
|
||||
Returns:
|
||||
|
||||
nothing
|
||||
@ -57,7 +55,6 @@ class GetCharMap:
|
||||
fields[1].replace('\\colon', ':')
|
||||
map_dict[fields[1]] = fields[3]
|
||||
|
||||
|
||||
if not found_map:
|
||||
msg = 'no map found\nmap is "%s"\n'%(map,)
|
||||
raise self.__bug_handler, msg
|
||||
|
@ -11,8 +11,10 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, tempfile, cStringIO
|
||||
|
||||
from calibre.ebooks.rtf2xml import get_char_map, copy
|
||||
from calibre.ebooks.rtf2xml.char_set import char_set
|
||||
|
||||
class Hex2Utf8:
|
||||
"""
|
||||
Convert Microsoft hexidecimal numbers to utf-8
|
||||
@ -108,7 +110,7 @@ class Hex2Utf8:
|
||||
"""
|
||||
self.__file=file
|
||||
self.__copy = copy
|
||||
if area_to_convert != 'preamble' and area_to_convert != 'body':
|
||||
if area_to_convert not in ('preamble', 'body'):
|
||||
msg = (
|
||||
'in module "hex_2_utf8.py\n'
|
||||
'"area_to_convert" must be "body" or "preamble"\n'
|
||||
@ -136,12 +138,12 @@ class Hex2Utf8:
|
||||
Set values, including those for the dictionaries.
|
||||
The file that contains the maps is broken down into many different
|
||||
sets. For example, for the Symbol font, there is the standard part for
|
||||
hexidecimal numbers, and the the part for Microsoft charcters. Read
|
||||
hexidecimal numbers, and the part for Microsoft characters. Read
|
||||
each part in, and then combine them.
|
||||
"""
|
||||
# the default encoding system, the lower map for characters 0 through
|
||||
# 128, and the encoding system for Microsoft characters.
|
||||
# New on 2004-05-8: the self.__char_map is not in diretory with other
|
||||
# New on 2004-05-8: the self.__char_map is not in directory with other
|
||||
# modules
|
||||
self.__char_file = cStringIO.StringIO(char_set)
|
||||
char_map_obj = get_char_map.GetCharMap(
|
||||
@ -188,7 +190,6 @@ class Hex2Utf8:
|
||||
'body' : self.__body_func,
|
||||
'mi<mk<body-open_' : self.__found_body_func,
|
||||
'tx<hx<__________' : self.__hex_text_func,
|
||||
# 'tx<nu<__________' : self.__text_func,
|
||||
}
|
||||
self.__body_state_dict = {
|
||||
'preamble' : self.__preamble_for_body_func,
|
||||
@ -228,9 +229,7 @@ class Hex2Utf8:
|
||||
font = self.__current_dict_name
|
||||
if self.__convert_caps\
|
||||
and self.__caps_list[-1] == 'true'\
|
||||
and font != 'Symbol'\
|
||||
and font != 'Wingdings'\
|
||||
and font != 'Zapf Dingbats':
|
||||
and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
|
||||
converted = self.__utf_token_to_caps_func(converted)
|
||||
self.__write_obj.write(
|
||||
'tx<ut<__________<%s\n' % converted
|
||||
@ -240,9 +239,7 @@ class Hex2Utf8:
|
||||
font = self.__current_dict_name
|
||||
if self.__convert_caps\
|
||||
and self.__caps_list[-1] == 'true'\
|
||||
and font != 'Symbol'\
|
||||
and font != 'Wingdings'\
|
||||
and font != 'Zapf Dingbats':
|
||||
and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
|
||||
converted = converted.upper()
|
||||
self.__write_obj.write(
|
||||
'tx<nu<__________<%s\n' % converted
|
||||
@ -282,17 +279,16 @@ class Hex2Utf8:
|
||||
|
||||
def __convert_preamble(self):
|
||||
self.__state = 'preamble'
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
action = self.__preamble_state_dict.get(self.__state)
|
||||
if action is None:
|
||||
sys.stderr.write(_('error no state found in hex_2_utf8'),
|
||||
sys.stderr.write('error no state found in hex_2_utf8',
|
||||
self.__state
|
||||
)
|
||||
action(line)
|
||||
self.__write_obj.close()
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
|
||||
@ -461,9 +457,9 @@ class Hex2Utf8:
|
||||
if len(self.__caps_list) > 1:
|
||||
self.__caps_list.pop()
|
||||
else:
|
||||
sys.stderr.write('Module is hex_2_utf8\n')
|
||||
sys.stderr.write('method is __end_caps_func\n')
|
||||
sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
|
||||
sys.stderr.write('Module is hex_2_utf8\n'
|
||||
'method is __end_caps_func\n'
|
||||
'caps list should be more than one?\n') #self.__in_caps not set
|
||||
|
||||
def __text_func(self, line):
|
||||
"""
|
||||
@ -486,8 +482,7 @@ class Hex2Utf8:
|
||||
hex_num = '\'%s' % hex_num
|
||||
converted = self.__current_dict.get(hex_num)
|
||||
if converted is None:
|
||||
sys.stderr.write('module is hex_2_ut8\n')
|
||||
sys.stderr.write('method is __text_func\n')
|
||||
sys.stderr.write('module is hex_2_ut8\nmethod is __text_func\n')
|
||||
sys.stderr.write('no hex value for "%s"\n' % hex_num)
|
||||
else:
|
||||
the_string += converted
|
||||
@ -543,7 +538,7 @@ class Hex2Utf8:
|
||||
def __convert_body(self):
|
||||
self.__state = 'body'
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
action = self.__body_state_dict.get(self.__state)
|
||||
@ -552,7 +547,6 @@ class Hex2Utf8:
|
||||
self.__state
|
||||
)
|
||||
action(line)
|
||||
self.__write_obj.close()
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
|
||||
|
@ -68,7 +68,6 @@ class Info:
|
||||
|
||||
'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'),
|
||||
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
|
||||
'cw<di<edit-time_' : (self.__found_tag_with_tokens_func, 'editing-time'),
|
||||
'cw<di<print-time' : (self.__found_tag_with_tokens_func, 'printing-time'),
|
||||
'cw<di<backuptime' : (self.__found_tag_with_tokens_func, 'backup-time'),
|
||||
|
||||
@ -77,6 +76,7 @@ class Info:
|
||||
'cw<di<numofchrws' : (self.__single_field_func, 'number-of-characters-without-space'),
|
||||
'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'),
|
||||
'cw<di<version___' : (self.__single_field_func, 'version'),
|
||||
'cw<di<edit-time_' : (self.__single_field_func, 'editing-time'),
|
||||
'cw<di<intern-ver' : (self.__single_field_func, 'internal-version-number'),
|
||||
'cw<di<internalID' : (self.__single_field_func, 'internal-id-number'),
|
||||
}
|
||||
|
@ -411,7 +411,7 @@ class Inline:
|
||||
self.__set_list_func(line)
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action is None:
|
||||
sys.stderr.write('No matching state in module inline_for_lists.py\n')
|
||||
sys.stderr.write('No matching state in module inline.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
action(line)
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
|
@ -214,6 +214,26 @@ class ProcessTokens:
|
||||
'nosupersub' : ('ci', 'no-su-supe', self.__no_sup_sub_func),
|
||||
'up' : ('ci', 'font-up___', self.divide_by_2),
|
||||
'v' : ('ci', 'hidden____', self.default_func),
|
||||
# underline
|
||||
# can't see why it isn't a char info: 'ul'=>'ci'
|
||||
'ul' : ('ci', 'underlined<continous', self.two_part_func),
|
||||
'uld' : ('ci', 'underlined<dotted', self.two_part_func),
|
||||
'uldash' : ('ci', 'underlined<dash', self.two_part_func),
|
||||
'uldashd' : ('ci', 'underlined<dash-dot', self.two_part_func),
|
||||
'uldashdd' : ('ci', 'underlined<dash-dot-dot', self.two_part_func),
|
||||
'uldb' : ('ci', 'underlined<double', self.two_part_func),
|
||||
'ulhwave' : ('ci', 'underlined<heavy-wave', self.two_part_func),
|
||||
'ulldash' : ('ci', 'underlined<long-dash', self.two_part_func),
|
||||
'ulth' : ('ci', 'underlined<thich', self.two_part_func),
|
||||
'ulthd' : ('ci', 'underlined<thick-dotted', self.two_part_func),
|
||||
'ulthdash' : ('ci', 'underlined<thick-dash', self.two_part_func),
|
||||
'ulthdashd' : ('ci', 'underlined<thick-dash-dot', self.two_part_func),
|
||||
'ulthdashdd' : ('ci', 'underlined<thick-dash-dot-dot', self.two_part_func),
|
||||
'ulthldash' : ('ci', 'underlined<thick-long-dash', self.two_part_func),
|
||||
'ululdbwave' : ('ci', 'underlined<double-wave', self.two_part_func),
|
||||
'ulw' : ('ci', 'underlined<word', self.two_part_func),
|
||||
'ulwave' : ('ci', 'underlined<wave', self.two_part_func),
|
||||
'ulnone' : ('ci', 'underlined<false', self.two_part_func),
|
||||
# table => tb
|
||||
'trowd' : ('tb', 'row-def___', self.default_func),
|
||||
'cell' : ('tb', 'cell______', self.default_func),
|
||||
@ -274,25 +294,6 @@ class ProcessTokens:
|
||||
'paperh' : ('pa', 'paper-hght', self.divide_by_20),
|
||||
# annotation => an
|
||||
'annotation' : ('an', 'annotation', self.default_func),
|
||||
# underline
|
||||
'ul' : ('ul', 'underlined<continous', self.two_part_func),
|
||||
'uld' : ('ul', 'underlined<dotted', self.two_part_func),
|
||||
'uldash' : ('ul', 'underlined<dash', self.two_part_func),
|
||||
'uldashd' : ('ul', 'underlined<dash-dot', self.two_part_func),
|
||||
'uldashdd' : ('ul', 'underlined<dash-dot-dot', self.two_part_func),
|
||||
'uldb' : ('ul', 'underlined<double', self.two_part_func),
|
||||
'ulhwave' : ('ul', 'underlined<heavy-wave', self.two_part_func),
|
||||
'ulldash' : ('ul', 'underlined<long-dash', self.two_part_func),
|
||||
'ulth' : ('ul', 'underlined<thich', self.two_part_func),
|
||||
'ulthd' : ('ul', 'underlined<thick-dotted', self.two_part_func),
|
||||
'ulthdash' : ('ul', 'underlined<thick-dash', self.two_part_func),
|
||||
'ulthdashd' : ('ul', 'underlined<thick-dash-dot', self.two_part_func),
|
||||
'ulthdashdd' : ('ul', 'underlined<thick-dash-dot-dot', self.two_part_func),
|
||||
'ulthldash' : ('ul', 'underlined<thick-long-dash', self.two_part_func),
|
||||
'ululdbwave' : ('ul', 'underlined<double-wave', self.two_part_func),
|
||||
'ulw' : ('ul', 'underlined<word', self.two_part_func),
|
||||
'ulwave' : ('ul', 'underlined<wave', self.two_part_func),
|
||||
'ulnone' : ('ul', 'underlined<false', self.two_part_func),
|
||||
# border => bd
|
||||
'trbrdrh' : ('bd', 'bor-t-r-hi', self.default_func),
|
||||
'trbrdrv' : ('bd', 'bor-t-r-vi', self.default_func),
|
||||
|
@ -496,7 +496,7 @@ Instead, ingore all section information in a field-block.
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action == None:
|
||||
sys.stderr.write('no no matching state in module sections.py\n')
|
||||
sys.stderr.write('no matching state in module sections.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
action(line)
|
||||
read_obj.close()
|
||||
|
@ -103,8 +103,6 @@ class Styles:
|
||||
'sect-note_' : 'endnotes-in-section',
|
||||
# list=> ls
|
||||
'list-text_' : 'list-text',
|
||||
# this line must be wrong because it duplicates an earlier one
|
||||
'list-text_' : 'list-text',
|
||||
'list______' : 'list',
|
||||
'list-lev-d' : 'list-level-definition',
|
||||
'list-cardi' : 'list-cardinal-numbering',
|
||||
|
@ -114,6 +114,7 @@ class Tokenize:
|
||||
# this is for older RTF
|
||||
input_file = self.__par_exp.sub('\n\\par \n', input_file)
|
||||
input_file = self.__cwdigit_exp.sub("\g<1>\n\g<2>", input_file)
|
||||
input_file = self.__cs_ast.sub("\g<1>", input_file)
|
||||
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
|
||||
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
|
||||
#remove \n in bin data
|
||||
@ -163,6 +164,8 @@ class Tokenize:
|
||||
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
||||
#this is for old RTF
|
||||
self.__par_exp = re.compile(r'(\\\n+|\\ )')
|
||||
#handle improper cs char-style with \* before without {
|
||||
self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
|
||||
#handle cw using a digit as argument and without space as delimiter
|
||||
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
|
||||
|
||||
|
@ -12,6 +12,7 @@ import os, re
|
||||
|
||||
from calibre import prepare_string_for_xml, isbytestring
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
|
||||
from calibre.ebooks.conversion.preprocess import DocAnalysis
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
|
||||
|
@ -477,17 +477,17 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
if opts.bibfile_enc in bibfile_enc :
|
||||
bibfile_enc = opts.bibfile_enc
|
||||
else :
|
||||
log(" WARNING: incorrect --choose-encoding flag, revert to default")
|
||||
log.warn("Incorrect --choose-encoding flag, revert to default")
|
||||
bibfile_enc = bibfile_enc[0]
|
||||
if opts.bibfile_enctag in bibfile_enctag :
|
||||
bibfile_enctag = opts.bibfile_enctag
|
||||
else :
|
||||
log(" WARNING: incorrect --choose-encoding-configuration flag, revert to default")
|
||||
log.warn("Incorrect --choose-encoding-configuration flag, revert to default")
|
||||
bibfile_enctag = bibfile_enctag[0]
|
||||
if opts.bib_entry in bib_entry :
|
||||
bib_entry = opts.bib_entry
|
||||
else :
|
||||
log(" WARNING: incorrect --entry-type flag, revert to default")
|
||||
log.warn("Incorrect --entry-type flag, revert to default")
|
||||
bib_entry = bib_entry[0]
|
||||
|
||||
if opts.verbose:
|
||||
@ -544,7 +544,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
elif opts.impcit == 'True' :
|
||||
citation_bibtex= True
|
||||
else :
|
||||
log(" WARNING: incorrect --create-citation, revert to default")
|
||||
log.warn("Incorrect --create-citation, revert to default")
|
||||
citation_bibtex= True
|
||||
else :
|
||||
citation_bibtex= opts.impcit
|
||||
@ -556,7 +556,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
elif opts.addfiles == 'True' :
|
||||
addfiles_bibtex = True
|
||||
else :
|
||||
log(" WARNING: incorrect --add-files-path, revert to default")
|
||||
log.warn("Incorrect --add-files-path, revert to default")
|
||||
addfiles_bibtex= True
|
||||
else :
|
||||
addfiles_bibtex = opts.addfiles
|
||||
@ -574,7 +574,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
if bib_entry == 'book' :
|
||||
nb_books = len(filter(check_entry_book_valid, data))
|
||||
if nb_books < nb_entries :
|
||||
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
||||
log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
||||
nb_entries = nb_books
|
||||
|
||||
# If connected device, add 'On Device' values to data
|
||||
|
Loading…
x
Reference in New Issue
Block a user