mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
RTF Input: Handle underlined text. Fixes #845328 (Underlined text in RTF not propogated when converted to ePub)
This commit is contained in:
commit
f1867f1128
@ -98,7 +98,7 @@
|
|||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</emph>
|
</emph>
|
||||||
</xsl:when>
|
</xsl:when>
|
||||||
<xsl:when test = "@underlined">
|
<xsl:when test = "@underlined and @underlined != 'false'">
|
||||||
<emph rend = "paragraph-emph-underlined">
|
<emph rend = "paragraph-emph-underlined">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</emph>
|
</emph>
|
||||||
@ -220,7 +220,7 @@
|
|||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template name="parse-styles-attrs">
|
<xsl:template name="parse-styles-attrs">
|
||||||
<!--<xsl:text>position:relative;</xsl:text>-->
|
<!--<xsl:text>position:relative;</xsl:text>
|
||||||
<xsl:if test="@space-before">
|
<xsl:if test="@space-before">
|
||||||
<xsl:text>padding-top:</xsl:text>
|
<xsl:text>padding-top:</xsl:text>
|
||||||
<xsl:value-of select="@space-before"/>
|
<xsl:value-of select="@space-before"/>
|
||||||
@ -230,7 +230,7 @@
|
|||||||
<xsl:text>padding-bottom:</xsl:text>
|
<xsl:text>padding-bottom:</xsl:text>
|
||||||
<xsl:value-of select="@space-after"/>
|
<xsl:value-of select="@space-after"/>
|
||||||
<xsl:text>pt;</xsl:text>
|
<xsl:text>pt;</xsl:text>
|
||||||
</xsl:if>
|
</xsl:if>-->
|
||||||
<xsl:if test="@left-indent">
|
<xsl:if test="@left-indent">
|
||||||
<xsl:text>padding-left:</xsl:text>
|
<xsl:text>padding-left:</xsl:text>
|
||||||
<xsl:value-of select="@left-indent"/>
|
<xsl:value-of select="@left-indent"/>
|
||||||
@ -256,15 +256,15 @@
|
|||||||
<xsl:value-of select="'italic'"/>
|
<xsl:value-of select="'italic'"/>
|
||||||
<xsl:text>;</xsl:text>
|
<xsl:text>;</xsl:text>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:if test="@underline and @underline != 'false'">
|
<xsl:if test="@underlined and @underlined != 'false'">
|
||||||
<xsl:text>text-decoration:underline</xsl:text>
|
<xsl:text>text-decoration:underline</xsl:text>
|
||||||
<xsl:text>;</xsl:text>
|
<xsl:text>;</xsl:text>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:if test="@line-spacing">
|
<!--<xsl:if test="@line-spacing">
|
||||||
<xsl:text>line-height:</xsl:text>
|
<xsl:text>line-height:</xsl:text>
|
||||||
<xsl:value-of select="@line-spacing"/>
|
<xsl:value-of select="@line-spacing"/>
|
||||||
<xsl:text>pt;</xsl:text>
|
<xsl:text>pt;</xsl:text>
|
||||||
</xsl:if>
|
</xsl:if>-->
|
||||||
<xsl:if test="(@align = 'just')">
|
<xsl:if test="(@align = 'just')">
|
||||||
<xsl:text>text-align: justify;</xsl:text>
|
<xsl:text>text-align: justify;</xsl:text>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
@ -314,7 +314,6 @@
|
|||||||
</xsl:attribute>
|
</xsl:attribute>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
|
|
||||||
</xsl:otherwise>
|
</xsl:otherwise>
|
||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
@ -452,6 +451,15 @@
|
|||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match = "rtf:field[@type='bookmark-start']">
|
||||||
|
<xsl:element name ="a">
|
||||||
|
<xsl:attribute name = "id">
|
||||||
|
<xsl:value-of select = "@number"/>
|
||||||
|
</xsl:attribute>
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</xsl:element>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match = "rtf:field">
|
<xsl:template match = "rtf:field">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
|
@ -93,7 +93,7 @@ def get_metadata(stream):
|
|||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
cpg = detect_codepage(stream)
|
cpg = detect_codepage(stream)
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
|
|
||||||
title_match = title_pat.search(block)
|
title_match = title_pat.search(block)
|
||||||
if title_match is not None:
|
if title_match is not None:
|
||||||
title = decode(title_match.group(1).strip(), cpg)
|
title = decode(title_match.group(1).strip(), cpg)
|
||||||
@ -162,7 +162,8 @@ def set_metadata(stream, options):
|
|||||||
index = src.rindex('}')
|
index = src.rindex('}')
|
||||||
return src[:index] + r'{\ '[:-1] + name + ' ' + val + '}}'
|
return src[:index] + r'{\ '[:-1] + name + ' ' + val + '}}'
|
||||||
src, pos = get_document_info(stream)
|
src, pos = get_document_info(stream)
|
||||||
if not src:
|
print 'I was thre'
|
||||||
|
if src is not None:
|
||||||
create_metadata(stream, options)
|
create_metadata(stream, options)
|
||||||
else:
|
else:
|
||||||
olen = len(src)
|
olen = len(src)
|
||||||
|
@ -41,7 +41,7 @@ border_style_map = {
|
|||||||
|
|
||||||
class InlineClass(etree.XSLTExtension):
|
class InlineClass(etree.XSLTExtension):
|
||||||
|
|
||||||
FMTS = ('italics', 'bold', 'underlined', 'strike-through', 'small-caps')
|
FMTS = ('italics', 'bold', 'strike-through', 'small-caps')
|
||||||
|
|
||||||
def __init__(self, log):
|
def __init__(self, log):
|
||||||
etree.XSLTExtension.__init__(self)
|
etree.XSLTExtension.__init__(self)
|
||||||
@ -54,6 +54,9 @@ class InlineClass(etree.XSLTExtension):
|
|||||||
for x in self.FMTS:
|
for x in self.FMTS:
|
||||||
if input_node.get(x, None) == 'true':
|
if input_node.get(x, None) == 'true':
|
||||||
classes.append(x)
|
classes.append(x)
|
||||||
|
#underlined is special
|
||||||
|
if input_node.get('underlined', 'false') != 'false':
|
||||||
|
classes.append('underlined')
|
||||||
fs = input_node.get('font-size', False)
|
fs = input_node.get('font-size', False)
|
||||||
if fs:
|
if fs:
|
||||||
if fs not in self.font_sizes:
|
if fs not in self.font_sizes:
|
||||||
@ -78,12 +81,13 @@ class RTFInput(InputFormatPlugin):
|
|||||||
def generate_xml(self, stream):
|
def generate_xml(self, stream):
|
||||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||||
ofile = 'dataxml.xml'
|
ofile = 'dataxml.xml'
|
||||||
run_lev, debug_dir = 1, None
|
run_lev, debug_dir, indent_out = 1, None, 0
|
||||||
if getattr(self.opts, 'debug_pipeline', None) is not None:
|
if getattr(self.opts, 'debug_pipeline', None) is not None:
|
||||||
try:
|
try:
|
||||||
os.mkdir(debug_dir)
|
os.mkdir('rtfdebug')
|
||||||
debug_dir = 'rtfdebug'
|
debug_dir = 'rtfdebug'
|
||||||
run_lev = 4
|
run_lev = 4
|
||||||
|
indent_out = 1
|
||||||
self.log('Running RTFParser in debug mode')
|
self.log('Running RTFParser in debug mode')
|
||||||
except:
|
except:
|
||||||
self.log.warn('Impossible to run RTFParser in debug mode')
|
self.log.warn('Impossible to run RTFParser in debug mode')
|
||||||
@ -108,7 +112,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
|
|
||||||
# Indent resulting XML.
|
# Indent resulting XML.
|
||||||
# Default is 0 (no indent).
|
# Default is 0 (no indent).
|
||||||
indent = 1,
|
indent = indent_out,
|
||||||
|
|
||||||
# Form lists from RTF. Default is 1.
|
# Form lists from RTF. Default is 1.
|
||||||
form_lists = 1,
|
form_lists = 1,
|
||||||
@ -157,7 +161,8 @@ class RTFInput(InputFormatPlugin):
|
|||||||
with open(name, 'wb') as f:
|
with open(name, 'wb') as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
imap[count] = name
|
imap[count] = name
|
||||||
#open(name+'.hex', 'wb').write(enc)
|
# with open(name+'.hex', 'wb') as f:
|
||||||
|
# f.write(enc)
|
||||||
return self.convert_images(imap)
|
return self.convert_images(imap)
|
||||||
|
|
||||||
def convert_images(self, imap):
|
def convert_images(self, imap):
|
||||||
@ -319,4 +324,6 @@ class RTFInput(InputFormatPlugin):
|
|||||||
opf.render(open('metadata.opf', 'wb'))
|
opf.render(open('metadata.opf', 'wb'))
|
||||||
return os.path.abspath('metadata.opf')
|
return os.path.abspath('metadata.opf')
|
||||||
|
|
||||||
|
#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
|
||||||
|
# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
|
||||||
|
# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"
|
||||||
|
@ -372,17 +372,17 @@ class ParseRtf:
|
|||||||
old_rtf = old_rtf_obj.check_if_old_rtf()
|
old_rtf = old_rtf_obj.check_if_old_rtf()
|
||||||
if old_rtf:
|
if old_rtf:
|
||||||
if self.__run_level > 5:
|
if self.__run_level > 5:
|
||||||
msg = 'older RTF\n'
|
msg = 'Older RTF\n'
|
||||||
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
||||||
raise RtfInvalidCodeException, msg
|
raise RtfInvalidCodeException, msg
|
||||||
if self.__run_level > 1:
|
if self.__run_level > 1:
|
||||||
sys.stderr.write('File could be older RTF...\n')
|
sys.stderr.write(_('File could be older RTF...\n'))
|
||||||
if found_destination:
|
if found_destination:
|
||||||
if self.__run_level > 1:
|
if self.__run_level > 1:
|
||||||
sys.stderr.write(
|
sys.stderr.write(_(
|
||||||
'File also has newer RTF.\n'
|
'File also has newer RTF.\n'
|
||||||
'Will do the best to convert.\n'
|
'Will do the best to convert.\n'
|
||||||
)
|
))
|
||||||
add_brackets_obj = add_brackets.AddBrackets(
|
add_brackets_obj = add_brackets.AddBrackets(
|
||||||
in_file = self.__temp_file,
|
in_file = self.__temp_file,
|
||||||
bug_handler = RtfInvalidCodeException,
|
bug_handler = RtfInvalidCodeException,
|
||||||
|
@ -53,4 +53,3 @@ class CheckBrackets:
|
|||||||
'total number of brackets is %s') % self.__bracket_count
|
'total number of brackets is %s') % self.__bracket_count
|
||||||
return (False, msg)
|
return (False, msg)
|
||||||
return (True, "Brackets match!")
|
return (True, "Brackets match!")
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ class Configure:
|
|||||||
if self.__show_config_file and self.__configuration_file:
|
if self.__show_config_file and self.__configuration_file:
|
||||||
sys.stderr.write('configuration file is "%s"\n' % self.__configuration_file)
|
sys.stderr.write('configuration file is "%s"\n' % self.__configuration_file)
|
||||||
if self.__show_config_file and not self.__configuration_file:
|
if self.__show_config_file and not self.__configuration_file:
|
||||||
sys.stderr.write('No configuraiton file found; using default vaules\n')
|
sys.stderr.write('No configuraiton file found; using default values\n')
|
||||||
if self.__configuration_file:
|
if self.__configuration_file:
|
||||||
read_obj = open(self.__configuration_file, 'r')
|
read_obj = open(self.__configuration_file, 'r')
|
||||||
line_to_read = 1
|
line_to_read = 1
|
||||||
|
@ -43,6 +43,7 @@ class DeleteInfo:
|
|||||||
'cw<it<listtable_',
|
'cw<it<listtable_',
|
||||||
'cw<it<revi-table',
|
'cw<it<revi-table',
|
||||||
'cw<ls<list-lev-d',
|
'cw<ls<list-lev-d',
|
||||||
|
# Field allowed
|
||||||
'cw<fd<field-inst',
|
'cw<fd<field-inst',
|
||||||
'cw<an<book-mk-st',
|
'cw<an<book-mk-st',
|
||||||
'cw<an<book-mk-en',
|
'cw<an<book-mk-en',
|
||||||
@ -81,7 +82,7 @@ class DeleteInfo:
|
|||||||
self.__ob = line
|
self.__ob = line
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
# write previous bracket, since didn't fine asterisk
|
# write previous bracket, since didn't find asterisk
|
||||||
if self.__ob:
|
if self.__ob:
|
||||||
self.__write_obj.write(self.__ob)
|
self.__write_obj.write(self.__ob)
|
||||||
self.__ob = 0
|
self.__ob = 0
|
||||||
@ -104,7 +105,7 @@ class DeleteInfo:
|
|||||||
If you find that you are in a delete group, and the previous
|
If you find that you are in a delete group, and the previous
|
||||||
token in not an open bracket (self.__ob = 0), that means
|
token in not an open bracket (self.__ob = 0), that means
|
||||||
that the delete group is nested inside another acceptable
|
that the delete group is nested inside another acceptable
|
||||||
detination group. In this case, you have alrady written
|
detination group. In this case, you have already written
|
||||||
the open bracket, so you will need to write the closed one
|
the open bracket, so you will need to write the closed one
|
||||||
as well.
|
as well.
|
||||||
"""
|
"""
|
||||||
|
@ -10,8 +10,10 @@
|
|||||||
# #
|
# #
|
||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, tempfile, re
|
import sys, os, tempfile, re
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import field_strings, copy
|
from calibre.ebooks.rtf2xml import field_strings, copy
|
||||||
|
|
||||||
class FieldsSmall:
|
class FieldsSmall:
|
||||||
"""
|
"""
|
||||||
=================
|
=================
|
||||||
@ -19,7 +21,7 @@ Purpose
|
|||||||
=================
|
=================
|
||||||
Write tags for bookmarks, index and toc entry fields in a tokenized file.
|
Write tags for bookmarks, index and toc entry fields in a tokenized file.
|
||||||
This module does not handle toc or index tables. (This module won't be any
|
This module does not handle toc or index tables. (This module won't be any
|
||||||
use to use to you unless you use it as part of the other modules.)
|
use to you unless you use it as part of the other modules.)
|
||||||
-----------
|
-----------
|
||||||
Method
|
Method
|
||||||
-----------
|
-----------
|
||||||
@ -50,6 +52,7 @@ file.
|
|||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
Initiate all values.
|
Initiate all values.
|
||||||
@ -76,6 +79,7 @@ file.
|
|||||||
tx = 'tx<nu<__________<(.*?)'
|
tx = 'tx<nu<__________<(.*?)'
|
||||||
reg_st = ob + bk_st + tx + cb
|
reg_st = ob + bk_st + tx + cb
|
||||||
self.__book_start = re.compile(r'%s' % reg_st)
|
self.__book_start = re.compile(r'%s' % reg_st)
|
||||||
|
|
||||||
def __before_body_func(self, line):
|
def __before_body_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -89,6 +93,7 @@ file.
|
|||||||
if self.__token_info == 'mi<mk<body-open_':
|
if self.__token_info == 'mi<mk<body-open_':
|
||||||
self.__state = 'body'
|
self.__state = 'body'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __body_func(self, line):
|
def __body_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -105,6 +110,7 @@ file.
|
|||||||
action(line, tag)
|
action(line, tag)
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __found_bookmark_func(self, line, tag):
|
def __found_bookmark_func(self, line, tag):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -120,6 +126,7 @@ file.
|
|||||||
self.__cb_count = 0
|
self.__cb_count = 0
|
||||||
self.__state = 'bookmark'
|
self.__state = 'bookmark'
|
||||||
self.__type_of_bookmark = tag
|
self.__type_of_bookmark = tag
|
||||||
|
|
||||||
def __bookmark_func(self, line):
|
def __bookmark_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -148,6 +155,7 @@ file.
|
|||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
elif line[0:2] == 'tx':
|
elif line[0:2] == 'tx':
|
||||||
self.__text_string += line[17:-1]
|
self.__text_string += line[17:-1]
|
||||||
|
|
||||||
def __parse_index_func(self, my_string):
|
def __parse_index_func(self, my_string):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -196,6 +204,7 @@ file.
|
|||||||
my_changed_string += '<sub-entry>%s' % sub_entry
|
my_changed_string += '<sub-entry>%s' % sub_entry
|
||||||
my_changed_string += '\n'
|
my_changed_string += '\n'
|
||||||
return my_changed_string
|
return my_changed_string
|
||||||
|
|
||||||
def __index_see_func(self, my_string):
|
def __index_see_func(self, my_string):
|
||||||
in_see = 0
|
in_see = 0
|
||||||
bracket_count = 0
|
bracket_count = 0
|
||||||
@ -221,6 +230,7 @@ file.
|
|||||||
in_see = 1
|
in_see = 1
|
||||||
changed_string += '%s\n' % line
|
changed_string += '%s\n' % line
|
||||||
return changed_string, see_string
|
return changed_string, see_string
|
||||||
|
|
||||||
def __index_bookmark_func(self, my_string):
|
def __index_bookmark_func(self, my_string):
|
||||||
"""
|
"""
|
||||||
Requries:
|
Requries:
|
||||||
@ -257,6 +267,7 @@ file.
|
|||||||
in_bookmark = 1
|
in_bookmark = 1
|
||||||
index_string += '%s\n' % line
|
index_string += '%s\n' % line
|
||||||
return index_string, bookmark_string
|
return index_string, bookmark_string
|
||||||
|
|
||||||
def __index__format_func(self, my_string):
|
def __index__format_func(self, my_string):
|
||||||
italics = 0
|
italics = 0
|
||||||
bold =0
|
bold =0
|
||||||
@ -268,6 +279,7 @@ file.
|
|||||||
if token_info == 'cw<in<index-ital':
|
if token_info == 'cw<in<index-ital':
|
||||||
italics = 1
|
italics = 1
|
||||||
return italics, bold
|
return italics, bold
|
||||||
|
|
||||||
def __parse_toc_func(self, my_string):
|
def __parse_toc_func(self, my_string):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -303,6 +315,7 @@ file.
|
|||||||
my_changed_string += '<main-entry>%s' % main_entry
|
my_changed_string += '<main-entry>%s' % main_entry
|
||||||
my_changed_string += '\n'
|
my_changed_string += '\n'
|
||||||
return my_changed_string
|
return my_changed_string
|
||||||
|
|
||||||
def __parse_bookmark_for_toc(self, my_string):
|
def __parse_bookmark_for_toc(self, my_string):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -348,6 +361,7 @@ file.
|
|||||||
in_bookmark = 1
|
in_bookmark = 1
|
||||||
toc_string += '%s\n' % line
|
toc_string += '%s\n' % line
|
||||||
return toc_string, book_start_string, book_end_string
|
return toc_string, book_start_string, book_end_string
|
||||||
|
|
||||||
def __parse_bookmark_func(self, my_string, type):
|
def __parse_bookmark_func(self, my_string, type):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -362,6 +376,7 @@ file.
|
|||||||
my_changed_string = ('mi<tg<empty-att_<field<type>%s'
|
my_changed_string = ('mi<tg<empty-att_<field<type>%s'
|
||||||
'<number>%s<update>none\n' % (type, my_string))
|
'<number>%s<update>none\n' % (type, my_string))
|
||||||
return my_changed_string
|
return my_changed_string
|
||||||
|
|
||||||
def __found_toc_index_func(self, line, tag):
|
def __found_toc_index_func(self, line, tag):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -377,6 +392,7 @@ file.
|
|||||||
self.__cb_count = 0
|
self.__cb_count = 0
|
||||||
self.__state = 'toc_index'
|
self.__state = 'toc_index'
|
||||||
self.__tag = tag
|
self.__tag = tag
|
||||||
|
|
||||||
def __toc_index_func(self, line):
|
def __toc_index_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -404,6 +420,7 @@ file.
|
|||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
else:
|
else:
|
||||||
self.__text_string += line
|
self.__text_string += line
|
||||||
|
|
||||||
def fix_fields(self):
|
def fix_fields(self):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -418,24 +435,19 @@ file.
|
|||||||
bookmark.
|
bookmark.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file)
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = '1'
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__token_info = line[:16]
|
||||||
line_to_read = read_obj.readline()
|
if self.__token_info == 'ob<nu<open-brack':
|
||||||
line = line_to_read
|
self.__ob_count = line[-5:-1]
|
||||||
self.__token_info = line[:16]
|
if self.__token_info == 'cb<nu<clos-brack':
|
||||||
if self.__token_info == 'ob<nu<open-brack':
|
self.__cb_count = line[-5:-1]
|
||||||
self.__ob_count = line[-5:-1]
|
action = self.__state_dict.get(self.__state)
|
||||||
if self.__token_info == 'cb<nu<clos-brack':
|
if action is None:
|
||||||
self.__cb_count = line[-5:-1]
|
sys.stderr.write('No matching state in module fields_small.py\n')
|
||||||
action = self.__state_dict.get(self.__state)
|
sys.stderr.write(self.__state + '\n')
|
||||||
if action == None:
|
action(line)
|
||||||
sys.stderr.write('no no matching state in module fields_small.py\n')
|
|
||||||
sys.stderr.write(self.__state + '\n')
|
|
||||||
action(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "fields_small.data")
|
copy_obj.copy_file(self.__write_to, "fields_small.data")
|
||||||
|
@ -25,8 +25,6 @@ class GetCharMap:
|
|||||||
|
|
||||||
'char_file'--the file with the mappings
|
'char_file'--the file with the mappings
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
nothing
|
nothing
|
||||||
@ -57,7 +55,6 @@ class GetCharMap:
|
|||||||
fields[1].replace('\\colon', ':')
|
fields[1].replace('\\colon', ':')
|
||||||
map_dict[fields[1]] = fields[3]
|
map_dict[fields[1]] = fields[3]
|
||||||
|
|
||||||
|
|
||||||
if not found_map:
|
if not found_map:
|
||||||
msg = 'no map found\nmap is "%s"\n'%(map,)
|
msg = 'no map found\nmap is "%s"\n'%(map,)
|
||||||
raise self.__bug_handler, msg
|
raise self.__bug_handler, msg
|
||||||
|
@ -11,8 +11,10 @@
|
|||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, tempfile, cStringIO
|
import sys, os, tempfile, cStringIO
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import get_char_map, copy
|
from calibre.ebooks.rtf2xml import get_char_map, copy
|
||||||
from calibre.ebooks.rtf2xml.char_set import char_set
|
from calibre.ebooks.rtf2xml.char_set import char_set
|
||||||
|
|
||||||
class Hex2Utf8:
|
class Hex2Utf8:
|
||||||
"""
|
"""
|
||||||
Convert Microsoft hexidecimal numbers to utf-8
|
Convert Microsoft hexidecimal numbers to utf-8
|
||||||
@ -108,7 +110,7 @@ class Hex2Utf8:
|
|||||||
"""
|
"""
|
||||||
self.__file=file
|
self.__file=file
|
||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
if area_to_convert != 'preamble' and area_to_convert != 'body':
|
if area_to_convert not in ('preamble', 'body'):
|
||||||
msg = (
|
msg = (
|
||||||
'in module "hex_2_utf8.py\n'
|
'in module "hex_2_utf8.py\n'
|
||||||
'"area_to_convert" must be "body" or "preamble"\n'
|
'"area_to_convert" must be "body" or "preamble"\n'
|
||||||
@ -136,12 +138,12 @@ class Hex2Utf8:
|
|||||||
Set values, including those for the dictionaries.
|
Set values, including those for the dictionaries.
|
||||||
The file that contains the maps is broken down into many different
|
The file that contains the maps is broken down into many different
|
||||||
sets. For example, for the Symbol font, there is the standard part for
|
sets. For example, for the Symbol font, there is the standard part for
|
||||||
hexidecimal numbers, and the the part for Microsoft charcters. Read
|
hexidecimal numbers, and the part for Microsoft characters. Read
|
||||||
each part in, and then combine them.
|
each part in, and then combine them.
|
||||||
"""
|
"""
|
||||||
# the default encoding system, the lower map for characters 0 through
|
# the default encoding system, the lower map for characters 0 through
|
||||||
# 128, and the encoding system for Microsoft characters.
|
# 128, and the encoding system for Microsoft characters.
|
||||||
# New on 2004-05-8: the self.__char_map is not in diretory with other
|
# New on 2004-05-8: the self.__char_map is not in directory with other
|
||||||
# modules
|
# modules
|
||||||
self.__char_file = cStringIO.StringIO(char_set)
|
self.__char_file = cStringIO.StringIO(char_set)
|
||||||
char_map_obj = get_char_map.GetCharMap(
|
char_map_obj = get_char_map.GetCharMap(
|
||||||
@ -188,7 +190,6 @@ class Hex2Utf8:
|
|||||||
'body' : self.__body_func,
|
'body' : self.__body_func,
|
||||||
'mi<mk<body-open_' : self.__found_body_func,
|
'mi<mk<body-open_' : self.__found_body_func,
|
||||||
'tx<hx<__________' : self.__hex_text_func,
|
'tx<hx<__________' : self.__hex_text_func,
|
||||||
# 'tx<nu<__________' : self.__text_func,
|
|
||||||
}
|
}
|
||||||
self.__body_state_dict = {
|
self.__body_state_dict = {
|
||||||
'preamble' : self.__preamble_for_body_func,
|
'preamble' : self.__preamble_for_body_func,
|
||||||
@ -228,9 +229,7 @@ class Hex2Utf8:
|
|||||||
font = self.__current_dict_name
|
font = self.__current_dict_name
|
||||||
if self.__convert_caps\
|
if self.__convert_caps\
|
||||||
and self.__caps_list[-1] == 'true'\
|
and self.__caps_list[-1] == 'true'\
|
||||||
and font != 'Symbol'\
|
and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
|
||||||
and font != 'Wingdings'\
|
|
||||||
and font != 'Zapf Dingbats':
|
|
||||||
converted = self.__utf_token_to_caps_func(converted)
|
converted = self.__utf_token_to_caps_func(converted)
|
||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'tx<ut<__________<%s\n' % converted
|
'tx<ut<__________<%s\n' % converted
|
||||||
@ -240,9 +239,7 @@ class Hex2Utf8:
|
|||||||
font = self.__current_dict_name
|
font = self.__current_dict_name
|
||||||
if self.__convert_caps\
|
if self.__convert_caps\
|
||||||
and self.__caps_list[-1] == 'true'\
|
and self.__caps_list[-1] == 'true'\
|
||||||
and font != 'Symbol'\
|
and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
|
||||||
and font != 'Wingdings'\
|
|
||||||
and font != 'Zapf Dingbats':
|
|
||||||
converted = converted.upper()
|
converted = converted.upper()
|
||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'tx<nu<__________<%s\n' % converted
|
'tx<nu<__________<%s\n' % converted
|
||||||
@ -282,17 +279,16 @@ class Hex2Utf8:
|
|||||||
|
|
||||||
def __convert_preamble(self):
|
def __convert_preamble(self):
|
||||||
self.__state = 'preamble'
|
self.__state = 'preamble'
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
action = self.__preamble_state_dict.get(self.__state)
|
action = self.__preamble_state_dict.get(self.__state)
|
||||||
if action is None:
|
if action is None:
|
||||||
sys.stderr.write(_('error no state found in hex_2_utf8'),
|
sys.stderr.write('error no state found in hex_2_utf8',
|
||||||
self.__state
|
self.__state
|
||||||
)
|
)
|
||||||
action(line)
|
action(line)
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
|
copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
|
||||||
@ -461,9 +457,9 @@ class Hex2Utf8:
|
|||||||
if len(self.__caps_list) > 1:
|
if len(self.__caps_list) > 1:
|
||||||
self.__caps_list.pop()
|
self.__caps_list.pop()
|
||||||
else:
|
else:
|
||||||
sys.stderr.write('Module is hex_2_utf8\n')
|
sys.stderr.write('Module is hex_2_utf8\n'
|
||||||
sys.stderr.write('method is __end_caps_func\n')
|
'method is __end_caps_func\n'
|
||||||
sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
|
'caps list should be more than one?\n') #self.__in_caps not set
|
||||||
|
|
||||||
def __text_func(self, line):
|
def __text_func(self, line):
|
||||||
"""
|
"""
|
||||||
@ -486,8 +482,7 @@ class Hex2Utf8:
|
|||||||
hex_num = '\'%s' % hex_num
|
hex_num = '\'%s' % hex_num
|
||||||
converted = self.__current_dict.get(hex_num)
|
converted = self.__current_dict.get(hex_num)
|
||||||
if converted is None:
|
if converted is None:
|
||||||
sys.stderr.write('module is hex_2_ut8\n')
|
sys.stderr.write('module is hex_2_ut8\nmethod is __text_func\n')
|
||||||
sys.stderr.write('method is __text_func\n')
|
|
||||||
sys.stderr.write('no hex value for "%s"\n' % hex_num)
|
sys.stderr.write('no hex value for "%s"\n' % hex_num)
|
||||||
else:
|
else:
|
||||||
the_string += converted
|
the_string += converted
|
||||||
@ -543,16 +538,15 @@ class Hex2Utf8:
|
|||||||
def __convert_body(self):
|
def __convert_body(self):
|
||||||
self.__state = 'body'
|
self.__state = 'body'
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
action = self.__body_state_dict.get(self.__state)
|
action = self.__body_state_dict.get(self.__state)
|
||||||
if action is None:
|
if action is None:
|
||||||
sys.stderr.write('error no state found in hex_2_utf8',
|
sys.stderr.write('error no state found in hex_2_utf8',
|
||||||
self.__state
|
self.__state
|
||||||
)
|
)
|
||||||
action(line)
|
action(line)
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
|
copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
|
||||||
|
@ -68,7 +68,6 @@ class Info:
|
|||||||
|
|
||||||
'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'),
|
'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'),
|
||||||
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
|
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
|
||||||
'cw<di<edit-time_' : (self.__found_tag_with_tokens_func, 'editing-time'),
|
|
||||||
'cw<di<print-time' : (self.__found_tag_with_tokens_func, 'printing-time'),
|
'cw<di<print-time' : (self.__found_tag_with_tokens_func, 'printing-time'),
|
||||||
'cw<di<backuptime' : (self.__found_tag_with_tokens_func, 'backup-time'),
|
'cw<di<backuptime' : (self.__found_tag_with_tokens_func, 'backup-time'),
|
||||||
|
|
||||||
@ -77,6 +76,7 @@ class Info:
|
|||||||
'cw<di<numofchrws' : (self.__single_field_func, 'number-of-characters-without-space'),
|
'cw<di<numofchrws' : (self.__single_field_func, 'number-of-characters-without-space'),
|
||||||
'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'),
|
'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'),
|
||||||
'cw<di<version___' : (self.__single_field_func, 'version'),
|
'cw<di<version___' : (self.__single_field_func, 'version'),
|
||||||
|
'cw<di<edit-time_' : (self.__single_field_func, 'editing-time'),
|
||||||
'cw<di<intern-ver' : (self.__single_field_func, 'internal-version-number'),
|
'cw<di<intern-ver' : (self.__single_field_func, 'internal-version-number'),
|
||||||
'cw<di<internalID' : (self.__single_field_func, 'internal-id-number'),
|
'cw<di<internalID' : (self.__single_field_func, 'internal-id-number'),
|
||||||
}
|
}
|
||||||
|
@ -411,11 +411,11 @@ class Inline:
|
|||||||
self.__set_list_func(line)
|
self.__set_list_func(line)
|
||||||
action = self.__state_dict.get(self.__state)
|
action = self.__state_dict.get(self.__state)
|
||||||
if action is None:
|
if action is None:
|
||||||
sys.stderr.write('No matching state in module inline_for_lists.py\n')
|
sys.stderr.write('No matching state in module inline.py\n')
|
||||||
sys.stderr.write(self.__state + '\n')
|
sys.stderr.write(self.__state + '\n')
|
||||||
action(line)
|
action(line)
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "inline.data")
|
copy_obj.copy_file(self.__write_to, "inline.data")
|
||||||
copy_obj.rename(self.__write_to, self.__file)
|
copy_obj.rename(self.__write_to, self.__file)
|
||||||
os.remove(self.__write_to)
|
os.remove(self.__write_to)
|
@ -214,7 +214,27 @@ class ProcessTokens:
|
|||||||
'nosupersub' : ('ci', 'no-su-supe', self.__no_sup_sub_func),
|
'nosupersub' : ('ci', 'no-su-supe', self.__no_sup_sub_func),
|
||||||
'up' : ('ci', 'font-up___', self.divide_by_2),
|
'up' : ('ci', 'font-up___', self.divide_by_2),
|
||||||
'v' : ('ci', 'hidden____', self.default_func),
|
'v' : ('ci', 'hidden____', self.default_func),
|
||||||
# table => tb
|
# underline
|
||||||
|
# can't see why it isn't a char info: 'ul'=>'ci'
|
||||||
|
'ul' : ('ci', 'underlined<continous', self.two_part_func),
|
||||||
|
'uld' : ('ci', 'underlined<dotted', self.two_part_func),
|
||||||
|
'uldash' : ('ci', 'underlined<dash', self.two_part_func),
|
||||||
|
'uldashd' : ('ci', 'underlined<dash-dot', self.two_part_func),
|
||||||
|
'uldashdd' : ('ci', 'underlined<dash-dot-dot', self.two_part_func),
|
||||||
|
'uldb' : ('ci', 'underlined<double', self.two_part_func),
|
||||||
|
'ulhwave' : ('ci', 'underlined<heavy-wave', self.two_part_func),
|
||||||
|
'ulldash' : ('ci', 'underlined<long-dash', self.two_part_func),
|
||||||
|
'ulth' : ('ci', 'underlined<thich', self.two_part_func),
|
||||||
|
'ulthd' : ('ci', 'underlined<thick-dotted', self.two_part_func),
|
||||||
|
'ulthdash' : ('ci', 'underlined<thick-dash', self.two_part_func),
|
||||||
|
'ulthdashd' : ('ci', 'underlined<thick-dash-dot', self.two_part_func),
|
||||||
|
'ulthdashdd' : ('ci', 'underlined<thick-dash-dot-dot', self.two_part_func),
|
||||||
|
'ulthldash' : ('ci', 'underlined<thick-long-dash', self.two_part_func),
|
||||||
|
'ululdbwave' : ('ci', 'underlined<double-wave', self.two_part_func),
|
||||||
|
'ulw' : ('ci', 'underlined<word', self.two_part_func),
|
||||||
|
'ulwave' : ('ci', 'underlined<wave', self.two_part_func),
|
||||||
|
'ulnone' : ('ci', 'underlined<false', self.two_part_func),
|
||||||
|
# table => tb
|
||||||
'trowd' : ('tb', 'row-def___', self.default_func),
|
'trowd' : ('tb', 'row-def___', self.default_func),
|
||||||
'cell' : ('tb', 'cell______', self.default_func),
|
'cell' : ('tb', 'cell______', self.default_func),
|
||||||
'row' : ('tb', 'row_______', self.default_func),
|
'row' : ('tb', 'row_______', self.default_func),
|
||||||
@ -274,25 +294,6 @@ class ProcessTokens:
|
|||||||
'paperh' : ('pa', 'paper-hght', self.divide_by_20),
|
'paperh' : ('pa', 'paper-hght', self.divide_by_20),
|
||||||
# annotation => an
|
# annotation => an
|
||||||
'annotation' : ('an', 'annotation', self.default_func),
|
'annotation' : ('an', 'annotation', self.default_func),
|
||||||
# underline
|
|
||||||
'ul' : ('ul', 'underlined<continous', self.two_part_func),
|
|
||||||
'uld' : ('ul', 'underlined<dotted', self.two_part_func),
|
|
||||||
'uldash' : ('ul', 'underlined<dash', self.two_part_func),
|
|
||||||
'uldashd' : ('ul', 'underlined<dash-dot', self.two_part_func),
|
|
||||||
'uldashdd' : ('ul', 'underlined<dash-dot-dot', self.two_part_func),
|
|
||||||
'uldb' : ('ul', 'underlined<double', self.two_part_func),
|
|
||||||
'ulhwave' : ('ul', 'underlined<heavy-wave', self.two_part_func),
|
|
||||||
'ulldash' : ('ul', 'underlined<long-dash', self.two_part_func),
|
|
||||||
'ulth' : ('ul', 'underlined<thich', self.two_part_func),
|
|
||||||
'ulthd' : ('ul', 'underlined<thick-dotted', self.two_part_func),
|
|
||||||
'ulthdash' : ('ul', 'underlined<thick-dash', self.two_part_func),
|
|
||||||
'ulthdashd' : ('ul', 'underlined<thick-dash-dot', self.two_part_func),
|
|
||||||
'ulthdashdd' : ('ul', 'underlined<thick-dash-dot-dot', self.two_part_func),
|
|
||||||
'ulthldash' : ('ul', 'underlined<thick-long-dash', self.two_part_func),
|
|
||||||
'ululdbwave' : ('ul', 'underlined<double-wave', self.two_part_func),
|
|
||||||
'ulw' : ('ul', 'underlined<word', self.two_part_func),
|
|
||||||
'ulwave' : ('ul', 'underlined<wave', self.two_part_func),
|
|
||||||
'ulnone' : ('ul', 'underlined<false', self.two_part_func),
|
|
||||||
# border => bd
|
# border => bd
|
||||||
'trbrdrh' : ('bd', 'bor-t-r-hi', self.default_func),
|
'trbrdrh' : ('bd', 'bor-t-r-hi', self.default_func),
|
||||||
'trbrdrv' : ('bd', 'bor-t-r-vi', self.default_func),
|
'trbrdrv' : ('bd', 'bor-t-r-vi', self.default_func),
|
||||||
@ -757,7 +758,7 @@ class ProcessTokens:
|
|||||||
def process_cw(self, token):
|
def process_cw(self, token):
|
||||||
"""Change the value of the control word by determining what dictionary
|
"""Change the value of the control word by determining what dictionary
|
||||||
it belongs to"""
|
it belongs to"""
|
||||||
special = [ '*', ':', '}', '{', '~', '_', '-', ';' ]
|
special = [ '*', ':', '}', '{', '~', '_', '-', ';' ]
|
||||||
##if token != "{" or token != "}":
|
##if token != "{" or token != "}":
|
||||||
token = token[1:] # strip off leading \
|
token = token[1:] # strip off leading \
|
||||||
token = token.replace(" ", "")
|
token = token.replace(" ", "")
|
||||||
@ -793,7 +794,7 @@ class ProcessTokens:
|
|||||||
raise self.__exception_handler, msg
|
raise self.__exception_handler, msg
|
||||||
|
|
||||||
the_index = token.find('\\ ')
|
the_index = token.find('\\ ')
|
||||||
if token is not None and the_index > -1:
|
if token is not None and the_index > -1:
|
||||||
msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\
|
msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\
|
||||||
% line_count
|
% line_count
|
||||||
raise self.__exception_handler, msg
|
raise self.__exception_handler, msg
|
||||||
@ -832,4 +833,4 @@ class ProcessTokens:
|
|||||||
msg = '\nInvalid RTF: document does not have matching brackets.\n'
|
msg = '\nInvalid RTF: document does not have matching brackets.\n'
|
||||||
raise self.__exception_handler, msg
|
raise self.__exception_handler, msg
|
||||||
else:
|
else:
|
||||||
return self.__return_code
|
return self.__return_code
|
@ -496,7 +496,7 @@ Instead, ingore all section information in a field-block.
|
|||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
action = self.__state_dict.get(self.__state)
|
action = self.__state_dict.get(self.__state)
|
||||||
if action == None:
|
if action == None:
|
||||||
sys.stderr.write('no no matching state in module sections.py\n')
|
sys.stderr.write('no matching state in module sections.py\n')
|
||||||
sys.stderr.write(self.__state + '\n')
|
sys.stderr.write(self.__state + '\n')
|
||||||
action(line)
|
action(line)
|
||||||
read_obj.close()
|
read_obj.close()
|
||||||
|
@ -103,8 +103,6 @@ class Styles:
|
|||||||
'sect-note_' : 'endnotes-in-section',
|
'sect-note_' : 'endnotes-in-section',
|
||||||
# list=> ls
|
# list=> ls
|
||||||
'list-text_' : 'list-text',
|
'list-text_' : 'list-text',
|
||||||
# this line must be wrong because it duplicates an earlier one
|
|
||||||
'list-text_' : 'list-text',
|
|
||||||
'list______' : 'list',
|
'list______' : 'list',
|
||||||
'list-lev-d' : 'list-level-definition',
|
'list-lev-d' : 'list-level-definition',
|
||||||
'list-cardi' : 'list-cardinal-numbering',
|
'list-cardi' : 'list-cardinal-numbering',
|
||||||
|
@ -114,6 +114,7 @@ class Tokenize:
|
|||||||
# this is for older RTF
|
# this is for older RTF
|
||||||
input_file = self.__par_exp.sub('\n\\par \n', input_file)
|
input_file = self.__par_exp.sub('\n\\par \n', input_file)
|
||||||
input_file = self.__cwdigit_exp.sub("\g<1>\n\g<2>", input_file)
|
input_file = self.__cwdigit_exp.sub("\g<1>\n\g<2>", input_file)
|
||||||
|
input_file = self.__cs_ast.sub("\g<1>", input_file)
|
||||||
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
|
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
|
||||||
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
|
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
|
||||||
#remove \n in bin data
|
#remove \n in bin data
|
||||||
@ -163,6 +164,8 @@ class Tokenize:
|
|||||||
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
||||||
#this is for old RTF
|
#this is for old RTF
|
||||||
self.__par_exp = re.compile(r'(\\\n+|\\ )')
|
self.__par_exp = re.compile(r'(\\\n+|\\ )')
|
||||||
|
#handle improper cs char-style with \* before without {
|
||||||
|
self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
|
||||||
#handle cw using a digit as argument and without space as delimiter
|
#handle cw using a digit as argument and without space as delimiter
|
||||||
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
|
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ import os, re
|
|||||||
|
|
||||||
from calibre import prepare_string_for_xml, isbytestring
|
from calibre import prepare_string_for_xml, isbytestring
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
|
||||||
from calibre.ebooks.conversion.preprocess import DocAnalysis
|
from calibre.ebooks.conversion.preprocess import DocAnalysis
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
|
||||||
|
@ -477,17 +477,17 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
if opts.bibfile_enc in bibfile_enc :
|
if opts.bibfile_enc in bibfile_enc :
|
||||||
bibfile_enc = opts.bibfile_enc
|
bibfile_enc = opts.bibfile_enc
|
||||||
else :
|
else :
|
||||||
log(" WARNING: incorrect --choose-encoding flag, revert to default")
|
log.warn("Incorrect --choose-encoding flag, revert to default")
|
||||||
bibfile_enc = bibfile_enc[0]
|
bibfile_enc = bibfile_enc[0]
|
||||||
if opts.bibfile_enctag in bibfile_enctag :
|
if opts.bibfile_enctag in bibfile_enctag :
|
||||||
bibfile_enctag = opts.bibfile_enctag
|
bibfile_enctag = opts.bibfile_enctag
|
||||||
else :
|
else :
|
||||||
log(" WARNING: incorrect --choose-encoding-configuration flag, revert to default")
|
log.warn("Incorrect --choose-encoding-configuration flag, revert to default")
|
||||||
bibfile_enctag = bibfile_enctag[0]
|
bibfile_enctag = bibfile_enctag[0]
|
||||||
if opts.bib_entry in bib_entry :
|
if opts.bib_entry in bib_entry :
|
||||||
bib_entry = opts.bib_entry
|
bib_entry = opts.bib_entry
|
||||||
else :
|
else :
|
||||||
log(" WARNING: incorrect --entry-type flag, revert to default")
|
log.warn("Incorrect --entry-type flag, revert to default")
|
||||||
bib_entry = bib_entry[0]
|
bib_entry = bib_entry[0]
|
||||||
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
@ -544,7 +544,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
elif opts.impcit == 'True' :
|
elif opts.impcit == 'True' :
|
||||||
citation_bibtex= True
|
citation_bibtex= True
|
||||||
else :
|
else :
|
||||||
log(" WARNING: incorrect --create-citation, revert to default")
|
log.warn("Incorrect --create-citation, revert to default")
|
||||||
citation_bibtex= True
|
citation_bibtex= True
|
||||||
else :
|
else :
|
||||||
citation_bibtex= opts.impcit
|
citation_bibtex= opts.impcit
|
||||||
@ -556,7 +556,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
elif opts.addfiles == 'True' :
|
elif opts.addfiles == 'True' :
|
||||||
addfiles_bibtex = True
|
addfiles_bibtex = True
|
||||||
else :
|
else :
|
||||||
log(" WARNING: incorrect --add-files-path, revert to default")
|
log.warn("Incorrect --add-files-path, revert to default")
|
||||||
addfiles_bibtex= True
|
addfiles_bibtex= True
|
||||||
else :
|
else :
|
||||||
addfiles_bibtex = opts.addfiles
|
addfiles_bibtex = opts.addfiles
|
||||||
@ -574,7 +574,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
if bib_entry == 'book' :
|
if bib_entry == 'book' :
|
||||||
nb_books = len(filter(check_entry_book_valid, data))
|
nb_books = len(filter(check_entry_book_valid, data))
|
||||||
if nb_books < nb_entries :
|
if nb_books < nb_entries :
|
||||||
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
||||||
nb_entries = nb_books
|
nb_entries = nb_books
|
||||||
|
|
||||||
# If connected device, add 'On Device' values to data
|
# If connected device, add 'On Device' values to data
|
||||||
|
Loading…
x
Reference in New Issue
Block a user