Fixed #47 and debugged --baen

This commit is contained in:
Kovid Goyal 2007-05-08 04:42:22 +00:00
parent 6ca0b3c143
commit 68968c7742
2 changed files with 10 additions and 6 deletions

View File

@ -210,13 +210,16 @@ class Span(_Span):
class HTMLConverter(object): class HTMLConverter(object):
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
BAEN_SANCTIFY = [(re.compile(r'<[Aa] id=.p[0-9]*. name=.p[0-9]*.><\/[Aa]>'),
lambda match: ''),
(re.compile('page-break-before:'), lambda match: '') ]
# Fix <a /> elements # Fix <a /> elements
MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"),
lambda match: match.group(1)+"></a>")] lambda match: match.group(1)+"></a>")]
# Fix Baen markup
BAEN_SANCTIFY = [(re.compile(r'<\s*[Aa]\s+id="p[0-9]+"\s+name="p[0-9]+"\s*>\s*<\/[Aa]>'),
lambda match: ''),
(re.compile(r'page-break-before:\s*\w+([\s;\}])'),
lambda match: match.group(1)) ]
class Link(object): class Link(object):
def __init__(self, para, tag): def __init__(self, para, tag):
@ -300,6 +303,7 @@ class HTMLConverter(object):
sys.stdout.flush() sys.stdout.flush()
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
nmassage.extend(HTMLConverter.MARKUP_MASSAGE) nmassage.extend(HTMLConverter.MARKUP_MASSAGE)
self.baen = baen
if baen: if baen:
nmassage.extend(HTMLConverter.BAEN_SANCTIFY) nmassage.extend(HTMLConverter.BAEN_SANCTIFY)
self.soup = BeautifulSoup(open(self.file_name, 'r').read(), self.soup = BeautifulSoup(open(self.file_name, 'r').read(),
@ -489,7 +493,7 @@ class HTMLConverter(object):
font_delta=self.font_delta, verbose=self.verbose, font_delta=self.font_delta, verbose=self.verbose,
link_level=self.link_level+1, link_level=self.link_level+1,
max_link_levels=self.max_link_levels, max_link_levels=self.max_link_levels,
is_root = False) is_root = False, baen=self.baen)
HTMLConverter.processed_files[path] = self.files[path] HTMLConverter.processed_files[path] = self.files[path]
except Exception, err: except Exception, err:
print >>sys.stderr, 'Unable to process', path, err print >>sys.stderr, 'Unable to process', path, err

View File

@ -75,7 +75,7 @@ def convert_txt(path, options):
for line in fileinput.input(path): for line in fileinput.input(path):
line = line.strip() line = line.strip()
if line: if line:
buffer += line buffer = buffer.rstrip() + ' ' + line
else: else:
block.Paragraph(buffer) block.Paragraph(buffer)
buffer = '' buffer = ''