From fc93d954e5e6bbd8d01401a7ef22d4eef238f56a Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 28 Nov 2009 09:41:23 -0500 Subject: [PATCH] PML input: cleanup and refactor a bit. Also fix an error when handling a tags. --- src/calibre/ebooks/pml/pmlconverter.py | 107 +++++++++++++++++++------ 1 file changed, 81 insertions(+), 26 deletions(-) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 2416be596a..8c34cc8da9 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -100,8 +100,17 @@ class PML_HTMLizer(object): 'SB': 'FS', } - DIV_STATES = [ + BLOCK_STATES = [ 'a', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + ] + + DIV_STATES = [ 'c', 'r', 't', @@ -197,22 +206,10 @@ class PML_HTMLizer(object): return u'%s

' % end - def process_code_simple(self, code): - if code not in self.CODE_STATES.keys(): - return u'' - - text = u'' - - if self.state[self.CODE_STATES[code]][0]: - text = self.STATES_TAGS[self.CODE_STATES[code]][1] - else: - text = self.STATES_TAGS[self.CODE_STATES[code]][0] - - self.state[self.CODE_STATES[code]][0] = not self.state[self.CODE_STATES[code]][0] - - return text - - def process_code_div_span(self, code, stream, pre=''): + def process_code(self, code, stream): + ''' + Used for processing div and span elements. + ''' text = u'' ds = [] @@ -225,6 +222,10 @@ class PML_HTMLizer(object): ds = self.SPAN_STATES[:] ss = [] + # Close code. + # Close all tags starting with the inline then close block. Remove the + # Tag that is closed from the list and reopen them all starting with + # block followed by inline. if self.state[code][0]: # Ignore multilple T's on the same line. They do not have a closing # code. They get closed at the end of the line. @@ -243,16 +244,17 @@ class PML_HTMLizer(object): text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1] else: text += self.STATES_TAGS[c][0] + # Open code. + # If the tag to open is a block we close all inline tags, open the block + # then re-open the inline tags. else: - # Close all spans if code is a div + # Close all spans if code is a div. for c in ss: if self.state[c][0]: text += self.STATES_TAGS[c][1] # Process the code if code in self.STATES_VALUE_REQ: val = self.code_value(stream) - if pre: - val = '#%s-%s' % (pre, val) text += self.STATES_TAGS[code][0] % val self.state[code][1] = val else: @@ -269,6 +271,59 @@ class PML_HTMLizer(object): return text + def process_code_block(self, code, stream, pre=''): + text = u'' + + code = self.CODE_STATES[code] + + # Close all spans + for c in self.SPAN_STATES: + if self.state[c][0]: + text += self.STATES_TAGS[c][1] + + # Process the code + if self.state[code][0]: + # Close tag + text += self.STATES_TAGS[code][1] + else: + # Open tag + if code in self.STATES_VALUE_REQ: + val = self.code_value(stream) + if pre: + val = '#%s-%s' % (pre, val) + text += self.STATES_TAGS[code][0] % val + self.state[code][1] = val + else: + text += self.STATES_TAGS[code][0] + + # Re-open all spans if code was a div based on state + for c in self.SPAN_STATES: + if self.state[c][0]: + if c in self.STATES_VALUE_REQ: + text += self.STATES_TAGS[code][0] % self.state[c][1] + else: + text += self.STATES_TAGS[c][0] + + self.state[code][0] = not self.state[code][0] + + return text + + + def process_code_simple(self, code): + if code not in self.CODE_STATES.keys(): + return u'' + + text = u'' + + if self.state[self.CODE_STATES[code]][0]: + text = self.STATES_TAGS[self.CODE_STATES[code]][1] + else: + text = self.STATES_TAGS[self.CODE_STATES[code]][0] + + self.state[self.CODE_STATES[code]][0] = not self.state[self.CODE_STATES[code]][0] + + return text + def code_value(self, stream): value = u'' # state 0 is before = @@ -334,15 +389,15 @@ class PML_HTMLizer(object): elif c in 'XS': l = line.read(1) if '%s%s' % (c, l) == 'Sd': - text = self.process_code_div_span('Sd', line, 'fns') + text = self.process_code_block('Sd', line, 'fns') elif '%s%s' % (c, l) == 'SB': - text = self.process_code_div_span('SB', line) + text = self.process_code('SB', line) else: text = self.process_code_simple('%s%s' % (c, l)) elif c == 'q': - text = self.process_code_div_span(c, line) + text = self.process_code_block(c, line) elif c in 'crtTiIuobBlk': - text = self.process_code_div_span(c, line) + text = self.process_code(c, line) elif c == 'm': empty = False src = self.code_value(line) @@ -364,9 +419,9 @@ class PML_HTMLizer(object): elif c == 'F': l = line.read(1) if '%s%s' % (c, l) == 'Fn': - text = self.process_code_div_span('Fn', line, 'fns') + text = self.process_code_block('Fn', line, 'fns') elif '%s%s' % (c, l) == 'FN': - text = self.process_code_div_span('FN', line) + text = self.process_code('FN', line) elif c == 'w': empty = False text = '
' % self.code_value(line)