diff --git a/src/libprs500/lrf/html/convert_from.py b/src/libprs500/lrf/html/convert_from.py index 7c0ee4b8f6..6f1a34828d 100644 --- a/src/libprs500/lrf/html/convert_from.py +++ b/src/libprs500/lrf/html/convert_from.py @@ -210,13 +210,16 @@ class Span(_Span): class HTMLConverter(object): SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) - BAEN_SANCTIFY = [(re.compile(r'<[Aa] id=.p[0-9]*. name=.p[0-9]*.><\/[Aa]>'), - lambda match: ''), - (re.compile('page-break-before:'), lambda match: '') ] - # Fix elements MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), lambda match: match.group(1)+">")] + # Fix Baen markup + BAEN_SANCTIFY = [(re.compile(r'<\s*[Aa]\s+id="p[0-9]+"\s+name="p[0-9]+"\s*>\s*<\/[Aa]>'), + lambda match: ''), + (re.compile(r'page-break-before:\s*\w+([\s;\}])'), + lambda match: match.group(1)) ] + + class Link(object): def __init__(self, para, tag): @@ -300,6 +303,7 @@ class HTMLConverter(object): sys.stdout.flush() nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) nmassage.extend(HTMLConverter.MARKUP_MASSAGE) + self.baen = baen if baen: nmassage.extend(HTMLConverter.BAEN_SANCTIFY) self.soup = BeautifulSoup(open(self.file_name, 'r').read(), @@ -489,7 +493,7 @@ class HTMLConverter(object): font_delta=self.font_delta, verbose=self.verbose, link_level=self.link_level+1, max_link_levels=self.max_link_levels, - is_root = False) + is_root = False, baen=self.baen) HTMLConverter.processed_files[path] = self.files[path] except Exception, err: print >>sys.stderr, 'Unable to process', path, err diff --git a/src/libprs500/lrf/txt/convert_from.py b/src/libprs500/lrf/txt/convert_from.py index 0b492311bb..056ec92c0a 100644 --- a/src/libprs500/lrf/txt/convert_from.py +++ b/src/libprs500/lrf/txt/convert_from.py @@ -75,7 +75,7 @@ def convert_txt(path, options): for line in fileinput.input(path): line = line.strip() if line: - buffer += line + buffer = buffer.rstrip() + ' ' + line else: block.Paragraph(buffer) buffer = ''