From 3064a0666bccc83182234224b9e8ddae1f9cb2fb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 9 May 2007 05:42:46 +0000
Subject: [PATCH] Support for text-indent

---
 src/libprs500/lrf/html/convert_from.py | 63 ++++++++++++++++++--------
 src/libprs500/lrf/html/demo/demo.html  | 26 ++++++++---
 2 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/src/libprs500/lrf/html/convert_from.py b/src/libprs500/lrf/html/convert_from.py
index 106f8b5935..7fd671452a 100644
--- a/src/libprs500/lrf/html/convert_from.py
+++ b/src/libprs500/lrf/html/convert_from.py
@@ -231,11 +231,11 @@ class HTMLConverter(object):
             
     # Defaults for various formatting tags        
     css = dict(
-            h1     = {"font-size"   :"xx-large", "font-weight":"bold"},
-            h2     = {"font-size"   :"x-large", "font-weight":"bold"},
-            h3     = {"font-size"   :"large", "font-weight":"bold"},
-            h4     = {"font-size"   :"large"},
-            h5     = {"font-weight" :"bold"},
+            h1     = {"font-size"   :"xx-large", "font-weight":"bold", 'text-indent':'0pt'},
+            h2     = {"font-size"   :"x-large", "font-weight":"bold", 'text-indent':'0pt'},
+            h3     = {"font-size"   :"large", "font-weight":"bold", 'text-indent':'0pt'},
+            h4     = {"font-size"   :"large", 'text-indent':'0pt'},
+            h5     = {"font-weight" :"bold", 'text-indent':'0pt'},
             b      = {"font-weight" :"bold"},
             strong = {"font-weight" :"bold"},
             i      = {"font-style"  :"italic"},
@@ -282,9 +282,6 @@ class HTMLConverter(object):
         self.scaled_images = {}   #: Temporary files with scaled version of images        
         self.max_link_levels = max_link_levels #: Number of link levels to process recursively
         self.link_level  = link_level  #: Current link level
-        self.justification_styles = dict(head=book.create_text_style(align='head'), 
-                                         foot=book.create_text_style(align='foot'), 
-                                         center=book.create_text_style(align='center'))
         self.blockquote_style = book.create_block_style(sidemargin=60, 
                                                         topskip=20, footskip=20)
         self.unindented_style = book.create_text_style(parindent=0)
@@ -595,8 +592,11 @@ class HTMLConverter(object):
             if align != self.current_block.textStyle.attrs['align']:
                 self.current_para.append_to(self.current_block)
                 self.current_block.append_to(self.current_page)
+                ts = self.book.create_text_style(**self.current_block.textStyle.attrs)
+                ts.attrs['align'] = align
                 self.current_block = self.book.create_text_block(
-                                    textStyle=self.justification_styles[align])
+                                    blockStyle=self.current_block.blockStyle,
+                                    textStyle=ts)
                 self.current_para = Paragraph()
             try:
                 self.current_para.append(Span(src, self.sanctify_css(css), self.memory,\
@@ -609,7 +609,7 @@ class HTMLConverter(object):
         """ Make css safe for use in a SPAM Xylog tag """
         for key in css.keys():
             test = key.lower()
-            if test.startswith('margin') or 'indent' in test or \
+            if test.startswith('margin') or \
                'padding' in test or 'border' in test or 'page-break' in test \
                or test.startswith('mso') or test.startswith('background')\
                or test in ['color', 'display', 'text-decoration', \
@@ -636,7 +636,8 @@ class HTMLConverter(object):
         self.current_para.append_to(self.current_block)
         self.current_block.append_to(self.current_page)
         self.current_para = Paragraph()
-        self.current_block = self.book.create_text_block()
+        self.current_block = self.book.create_text_block(textStyle=self.current_block.textStyle,
+                                                         blockStyle=self.current_block.blockStyle)
     
     def parse_tag(self, tag, parent_css):
         try:
@@ -687,7 +688,8 @@ class HTMLConverter(object):
                             break
                     if target and not isinstance(target, (TextBlock, ImageBlock)):
                         if isinstance(target, RuledLine):
-                            target = self.book.create_text_block()
+                            target = self.book.create_text_block(textStyle=self.current_block.textStyle,
+                                                         blockStyle=self.current_block.blockStyle)
                             target.Paragraph(' ')
                             self.current_page.append(target)
                         else:
@@ -768,7 +770,8 @@ class HTMLConverter(object):
                     self.current_block.append(self.current_para)
                     self.current_page.append(self.current_block)
                     self.current_para = Paragraph()
-                    self.current_block = self.book.create_text_block()
+                    self.current_block = self.book.create_text_block(textStyle=self.current_block.textStyle,
+                                                         blockStyle=self.current_block.blockStyle)
                     im = ImageBlock(self.images[path], x1=width, y1=height, 
                                     xsize=width, ysize=height)
                     self.current_page.append(im)                        
@@ -795,6 +798,7 @@ class HTMLConverter(object):
             self.end_current_para()
             self.current_block.append_to(self.current_page)
             self.current_block = self.book.create_text_block(
+                                    blockStyle=self.current_block.blockStyle,
                                     textStyle=self.unindented_style)
             src = ''.join([str(i) for i in tag.contents])
             lines = src.split('\n')
@@ -809,6 +813,7 @@ class HTMLConverter(object):
             self.in_ol = 1 if tagname == 'ol' else 0
             self.end_current_block()
             self.current_block = self.book.create_text_block(
+                                        blockStyle=self.current_block.blockStyle,
                                         textStyle=self.unindented_style)
             self.process_children(tag, tag_css)
             self.in_ol = 0
@@ -824,7 +829,7 @@ class HTMLConverter(object):
             self.process_children(tag, tag_css)
             if self.in_ol:
                 self.in_ol += 1
-        elif tagname in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+        elif False and tagname in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
             self.end_current_para()
             if self.current_block.contents:
                 self.current_block.append(CR())
@@ -834,17 +839,39 @@ class HTMLConverter(object):
         elif tagname == 'blockquote':
             self.current_para.append_to(self.current_block)
             self.current_block.append_to(self.current_page)
+            pb = self.current_block
             self.current_para = Paragraph()
+            ts = self.book.create_text_style(**self.current_block.textStyle.attrs)
+            ts.attrs['parindent'] = 0
+            bs = self.book.create_block_style(**self.current_block.blockStyle.attrs)
+            bs.attrs['sidemargin'], bs.attrs['topskip'], bs.attrs['footskip'] = \
+            60, 20, 20
             self.current_block = self.book.create_text_block(
-                                    blockStyle=self.blockquote_style,
-                                    textStyle=self.unindented_style)
+                                    blockStyle=bs, textStyle=ts)
             self.process_children(tag, tag_css)
-            self.end_current_block()
-        elif tagname in ['p', 'div']:
+            self.current_para.append_to(self.current_block)
+            self.current_block.append_to(self.current_page)
+            self.current_para = Paragraph()
+            self.current_block = self.book.create_text_block(textStyle=pb.textStyle,
+                                                             blockStyle=pb.blockStyle)
+        elif tagname in ['p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
             self.end_current_para()
             self.lstrip_toggle = True
+            if tag_css.has_key('text-indent'):
+                indent = Span.unit_convert(tag_css['text-indent'])
+                tag_css.pop('text-indent')
+            else:
+                indent = self.book.defaultTextStyle.attrs['parindent']
+            if indent != self.current_block.textStyle.attrs['parindent']:
+                self.current_block.append_to(self.current_page)
+                ts = self.book.create_text_style(**self.current_block.textStyle.attrs)
+                ts.attrs['parindent'] = indent
+                self.current_block = self.book.create_text_block(blockStyle=self.current_block.blockStyle,
+                                                                 textStyle=ts)
             self.process_children(tag, tag_css)
             self.end_current_para()
+            if tagname.startswith('h'):
+                self.current_block.append(CR())
         elif tagname in ['b', 'strong', 'i', 'em', 'span']:
             self.process_children(tag, tag_css)
         elif tagname == 'font':
diff --git a/src/libprs500/lrf/html/demo/demo.html b/src/libprs500/lrf/html/demo/demo.html
index 4df93b1de9..22992c5636 100644
--- a/src/libprs500/lrf/html/demo/demo.html
+++ b/src/libprs500/lrf/html/demo/demo.html
@@ -1,8 +1,14 @@
 <html>
+<head>
+<style type='text/css'>
+.toc { page-break-after: always; text-indent: 0em; }
+</style>
+</head>
   <h1>Demo of <span style='font-family:monospace'>html2lrf</span></h1>
   <p>
   This file contains a demonstration of the capabilities of   <span style='font-family:monospace'>html2lrf,</span>   the HTML to LRF converter   from <em>libprs500.</em> To obtain libprs500 visit  <span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
   </p>
+  <br/>
   <h2><a name='toc'>Table of Contents</a></h2>
   <ul style='page-break-after:always'>
     <li><a href='#lists'>Demonstration of Lists</a></li>
@@ -29,7 +35,7 @@
  <p>
  Note that nested lists are not supported.
  </p>
- <p style='page-break-after:always'>
+ <p class='toc'>
  <hr />
  <a href='#toc'>Table of Contents</a>
  </p>
@@ -49,27 +55,33 @@
  <center>A centered phrase</center>
  <span style='text-align:right'>A right aligned phrase</span>
  A normal phrase
- <br/>
  <hr />
  <p> A paragraph containing a <em>&lt;blockquote&gt;</em>
  <blockquote>This is blockquoted text. It is rendered in a separate block with margins.</blockquote>The above text should be distinct from the rest of the paragraph.
- <p style='page-break-after:always'>
+ </p>
+ <hr/>
+ <p style='text-indent:30em'>A very indented paragraph</p>
+ <p style='text-indent:0em'>An unindented paragraph</p>
+ <p>A default indented paragrpah</p>
+ <p class='toc'>
  <hr />
  <a href='#toc'>Table of Contents</a>
  </p>
 
- <h2 style='page-break-before:always'><a name='images'>Inline images</a></h2>
+ <h2><a name='images'>Inline images</a></h2>
  <p>
  Here I demonstrate the use of inline images in the midst of text. Here is a  small image <img src='small.jpg' /> embedded in a sentence. Now we have a  slightly larger image that is automatically put in its own block  <img src='medium.jpg' /> and finally we have a large image which is  automatically placed on a page by itself and prevented from being  autoscaled when the user changes from S to M to L. Try changing sizes  and see how the different embedding styles behave.  <img src='large.jpg' />
  </p>
-<p style='page-break-after:always'>
+<p class='toc'>
  <hr />
  <a href='#toc'>Table of Contents</a>
  </p>
 
- <h2 style='page-break-before:always'><a name='recursive'>Recursive link following</a></h2>
+ <h2><a name='recursive'>Recursive link following</a></h2>
+ <p>
  <span style='font:monospace'>html2lrf</span> follows links in HTML files that point to other files, recursively. Thus it can be used to convert a whole tree of HTML files into a single LRF file.
-<p style='page-break-after:always'>
+ </p>
+<p class='toc'>
  <hr />
  <a href='#toc'>Table of Contents</a>
  </p>