From 59e0787f681925a26dca3cabcea311203a0b3fca Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 20 Jul 2007 17:34:11 +0000 Subject: [PATCH] Add --ignore-tables option. --- src/libprs500/__init__.py | 2 +- src/libprs500/ebooks/lrf/__init__.py | 2 ++ src/libprs500/ebooks/lrf/html/convert_from.py | 22 ++++++++++++------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py index b9fe6ea059..c8534df2ef 100644 --- a/src/libprs500/__init__.py +++ b/src/libprs500/__init__.py @@ -13,7 +13,7 @@ ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ''' E-book management software''' -__version__ = "0.3.77" +__version__ = "0.3.78" __docformat__ = "epytext" __author__ = "Kovid Goyal " __appname__ = 'libprs500' diff --git a/src/libprs500/ebooks/lrf/__init__.py b/src/libprs500/ebooks/lrf/__init__.py index cddd29562d..5e8108dcd9 100644 --- a/src/libprs500/ebooks/lrf/__init__.py +++ b/src/libprs500/ebooks/lrf/__init__.py @@ -91,6 +91,8 @@ def option_parser(usage): profiles=['prs500'] parser.add_option('-o', '--output', action='store', default=None, \ help='Output file name. Default is derived from input filename') + parser.add_option('--ignore-tables', action='store_true', default=False, + help='Render HTML tables as blocks of text instead of actual tables. This is neccessary if the HTML contains very large or complex tables.') laf = parser.add_option_group('LOOK AND FEEL') laf.add_option('--cover', action='store', dest='cover', default=None, \ help='Path to file containing image to be used as cover') diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index 836ff67ae7..4b88f57dc8 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -260,7 +260,8 @@ class HTMLConverter(object): page_break=re.compile('h[12]', re.IGNORECASE), force_page_break=re.compile('$', re.IGNORECASE), profile=PRS500_PROFILE, - disable_autorotation=False): + disable_autorotation=False, + ignore_tables=False): ''' Convert HTML file at C{path} and add it to C{book}. After creating the object, you must call L{self.process_links} on it to create the links and @@ -342,6 +343,7 @@ class HTMLConverter(object): self.files = {} #: links that point to other files self.links_processed = False #: Whether links_processed has been called on this object self.font_delta = font_delta + self.ignore_tables = ignore_tables # Set by table processing code so that any within the table # point to the previous element self.anchor_to_previous = None @@ -611,7 +613,8 @@ class HTMLConverter(object): link_exclude=self.link_exclude, page_break=self.page_break, force_page_break=self.force_page_break, - disable_autorotation=self.disable_autorotation) + disable_autorotation=self.disable_autorotation, + ignore_tables=self.ignore_tables) HTMLConverter.processed_files[path] = self.files[path] except Exception: print >>sys.stderr, 'Unable to process', path @@ -1189,14 +1192,16 @@ class HTMLConverter(object): self.end_current_para() self.current_block.append(CR()) self.end_current_block() - self.current_page.RuledLine(linelength=int(self.current_page.pageStyle.attrs['textwidth'])) - elif tagname == 'td': # Needed for nested tables - self.current_para.append(" ") + if tagname == 'hr': + self.current_page.RuledLine(linelength=int(self.current_page.pageStyle.attrs['textwidth'])) self.process_children(tag, tag_css) - elif tagname == 'table' and not self.in_table: + elif tagname == 'td': # Needed for nested tables + self.current_para.append(' ') + self.process_children(tag, tag_css) + elif tagname == 'table' and not self.ignore_tables and not self.in_table: tag_css = self.tag_css(tag) # Table should not inherit CSS self.process_table(tag, tag_css) - else: + else: self.process_children(tag, tag_css) if end_page: self.end_page() @@ -1291,7 +1296,8 @@ def process_file(path, options): chapter_detection=options.chapter_detection, chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE), link_exclude=re.compile(le), page_break=pb, force_page_break=fpb, - disable_autorotation=options.disable_autorotation) + disable_autorotation=options.disable_autorotation, + ignore_tables=options.ignore_tables) conv.process_links() oname = options.output if not oname: