mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-26 00:02:25 -04:00 
			
		
		
		
	Working initial HTML->LRF converter with CSS support. Next on list is support for <style>, <link> and <img> tags.
This commit is contained in:
		
							parent
							
								
									15014f74fe
								
							
						
					
					
						commit
						d69fad53f4
					
				| @ -5,5 +5,6 @@ | |||||||
| <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property> | <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property> | ||||||
| <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> | <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> | ||||||
| <path>/libprs500/src</path> | <path>/libprs500/src</path> | ||||||
|  | <path>/libprs500/libprs500.lrf.txt</path> | ||||||
| </pydev_pathproperty> | </pydev_pathproperty> | ||||||
| </pydev_project> | </pydev_project> | ||||||
|  | |||||||
							
								
								
									
										8
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								setup.py
									
									
									
									
									
								
							| @ -33,7 +33,10 @@ if sys.argv[1] == 'py2exe': | |||||||
|     f.close() |     f.close() | ||||||
|     try: |     try: | ||||||
|         import py2exe |         import py2exe | ||||||
|         console = [{'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500'}] |         console = [{ | ||||||
|  |                     'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500', | ||||||
|  |                     'script' : 'src/libprs500/lrf/html/convert_from.py', 'dest_base':'html2lrf' | ||||||
|  |                   }] | ||||||
|         windows = [{'script' : 'src/libprs500/gui/main.py', 'dest_base':'prs500-gui', |         windows = [{'script' : 'src/libprs500/gui/main.py', 'dest_base':'prs500-gui', | ||||||
|                     'icon_resources':[(1,'icons/library.ico')]}] |                     'icon_resources':[(1,'icons/library.ico')]}] | ||||||
|         excludes = ["Tkconstants", "Tkinter", "tcl", "_imagingtk",  |         excludes = ["Tkconstants", "Tkinter", "tcl", "_imagingtk",  | ||||||
| @ -94,9 +97,8 @@ setup( | |||||||
|                              'prs500 = libprs500.cli.main:main', \ |                              'prs500 = libprs500.cli.main:main', \ | ||||||
|                              'lrf-meta = libprs500.lrf.meta:main', \ |                              'lrf-meta = libprs500.lrf.meta:main', \ | ||||||
|                              'rtf-meta = libprs500.metadata.rtf:main', \ |                              'rtf-meta = libprs500.metadata.rtf:main', \ | ||||||
|                              'makelrf = libprs500.lrf.makelrf:main', \ |  | ||||||
|                              'txt2lrf = libprs500.lrf.makelrf:txt', \ |                              'txt2lrf = libprs500.lrf.makelrf:txt', \ | ||||||
|                              'html2lrf = libprs500.lrf.makelrf:html',\ |                              'html2lrf = libprs500.lrf.html.convert_from:main',\ | ||||||
|                            ],  |                            ],  | ||||||
|         'gui_scripts'    : [ 'prs500-gui = libprs500.gui.main:main'] |         'gui_scripts'    : [ 'prs500-gui = libprs500.gui.main:main'] | ||||||
|       },  |       },  | ||||||
|  | |||||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 1.8 KiB | 
| @ -19,10 +19,10 @@ Code to convert HTML ebooks into LRF ebooks. | |||||||
| """ | """ | ||||||
| import os, re, sys | import os, re, sys | ||||||
| from htmlentitydefs import name2codepoint | from htmlentitydefs import name2codepoint | ||||||
| 
 | from optparse import OptionParser | ||||||
| 
 | 
 | ||||||
| from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, NavigableString | from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, NavigableString | ||||||
| from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR | from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR, Italic | ||||||
| from libprs500.lrf.pylrs.pylrs import Span as _Span | from libprs500.lrf.pylrs.pylrs import Span as _Span | ||||||
| from libprs500.lrf import ConversionError | from libprs500.lrf import ConversionError | ||||||
| 
 | 
 | ||||||
| @ -40,7 +40,7 @@ class Span(_Span): | |||||||
|         (an int) if successful. Otherwise, returns None. |         (an int) if successful. Otherwise, returns None. | ||||||
|         Assumes: 1 pixel is 1/4 mm. One em is 10pts |         Assumes: 1 pixel is 1/4 mm. One em is 10pts | ||||||
|         """ |         """ | ||||||
|         m = re.match("\s*([0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val) |         m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val) | ||||||
|         if m is not None: |         if m is not None: | ||||||
|             unit = float(m.group(1)) |             unit = float(m.group(1)) | ||||||
|             if m.group(2) == '%': |             if m.group(2) == '%': | ||||||
| @ -160,6 +160,10 @@ class Span(_Span): | |||||||
|             src = pat.sub(repl, src) |             src = pat.sub(repl, src) | ||||||
|         if not src: |         if not src: | ||||||
|             raise ConversionError('No point in adding an empty string') |             raise ConversionError('No point in adding an empty string') | ||||||
|  |         if 'font-style' in css.keys(): | ||||||
|  |             fs = css.pop('font-style') | ||||||
|  |             if fs.lower() == 'italic': | ||||||
|  |                 src = Italic(src) | ||||||
|         attrs = Span.translate_attrs(css) |         attrs = Span.translate_attrs(css) | ||||||
|         _Span.__init__(self, text=src, **attrs) |         _Span.__init__(self, text=src, **attrs) | ||||||
|          |          | ||||||
| @ -227,6 +231,13 @@ class HTMLConvertor(object): | |||||||
|         """ |         """ | ||||||
|         Return a dictionary of style properties applicable to Tag tag. |         Return a dictionary of style properties applicable to Tag tag. | ||||||
|         """ |         """ | ||||||
|  |         def merge_parent_css(prop, pcss): | ||||||
|  |             temp = {} | ||||||
|  |             for key in pcss.keys(): | ||||||
|  |                 if key.lower().startswith('font'): | ||||||
|  |                     temp[key] = pcss[key] | ||||||
|  |             prop.update(temp) | ||||||
|  |              | ||||||
|         prop = dict()         |         prop = dict()         | ||||||
|         if tag.has_key("align"): |         if tag.has_key("align"): | ||||||
|             prop["text-align"] = tag["align"] |             prop["text-align"] = tag["align"] | ||||||
| @ -238,7 +249,7 @@ class HTMLConvertor(object): | |||||||
|                 if self.css.has_key(classname): |                 if self.css.has_key(classname): | ||||||
|                     prop.update(self.css[classname]) |                     prop.update(self.css[classname]) | ||||||
|         if parent_css: |         if parent_css: | ||||||
|             prop.update(parent_css) |             merge_parent_css(prop, parent_css) | ||||||
|         if tag.has_key("style"): |         if tag.has_key("style"): | ||||||
|             prop.update(self.parse_style_properties(tag["style"]))     |             prop.update(self.parse_style_properties(tag["style"]))     | ||||||
|         return prop |         return prop | ||||||
| @ -257,21 +268,51 @@ class HTMLConvertor(object): | |||||||
|         if self.current_page: |         if self.current_page: | ||||||
|             self.book.append(self.current_page) |             self.book.append(self.current_page) | ||||||
|              |              | ||||||
|  |     def end_page(self): | ||||||
|  |         self.current_block.append(self.current_para) | ||||||
|  |         self.current_para = Paragraph() | ||||||
|  |         self.current_page.append(self.current_block) | ||||||
|  |         self.current_block = TextBlock() | ||||||
|  |         self.book.append(self.current_page) | ||||||
|  |         self.current_page = Page() | ||||||
|  |          | ||||||
|          |          | ||||||
|     def parse_tag(self, tag, parent_css): |     def parse_tag(self, tag, parent_css): | ||||||
|  |         def sanctify_css(css): | ||||||
|  |             """ Make css safe for use in a SPAM Xylog tag """ | ||||||
|  |             for key in css.keys(): | ||||||
|  |                 test = key.lower() | ||||||
|  |                 if test.startswith('margin') or 'indent' in test or \ | ||||||
|  |                    'padding' in test or 'border' in test or test in \ | ||||||
|  |                    ['color', 'display', 'text-decoration', 'letter-spacing']: | ||||||
|  |                     css.pop(key) | ||||||
|  |             return css | ||||||
|  |                      | ||||||
|         def add_text(tag, css): |         def add_text(tag, css): | ||||||
|             try: |             try: | ||||||
|                 self.current_para.append(Span(tag, css)) |                 self.current_para.append(Span(tag, sanctify_css(css))) | ||||||
|             except ConversionError, err: |             except ConversionError, err: | ||||||
|                 if self.verbose: |                 if self.verbose: | ||||||
|                     print >>sys.stderr, err |                     print >>sys.stderr, err | ||||||
|          |          | ||||||
|  |          | ||||||
|  |                      | ||||||
|         def process_text_tag(tag, pcss): |         def process_text_tag(tag, pcss): | ||||||
|  |             if 'page-break-before' in pcss.keys(): | ||||||
|  |                 if pcss['page-break-before'].lower() != 'avoid': | ||||||
|  |                     self.end_page() | ||||||
|  |                 pcss.pop('page-break-before') | ||||||
|  |             end_page = False | ||||||
|  |             if 'page-break-after' in pcss.keys(): | ||||||
|  |                 end_page = True | ||||||
|  |                 pcss.pop('page-break-after') | ||||||
|             for c in tag.contents: |             for c in tag.contents: | ||||||
|                 if isinstance(tag, NavigableString): |                 if isinstance(tag, NavigableString): | ||||||
|                     add_text(tag, pcss) |                     add_text(tag, pcss) | ||||||
|                 else: |                 else: | ||||||
|                     self.parse_tag(c, pcss) |                     self.parse_tag(c, pcss) | ||||||
|  |             if end_page: | ||||||
|  |                 self.end_page() | ||||||
|              |              | ||||||
|         try: |         try: | ||||||
|             tagname = tag.name.lower() |             tagname = tag.name.lower() | ||||||
| @ -280,8 +321,17 @@ class HTMLConvertor(object): | |||||||
|             return |             return | ||||||
|         if tagname in ["title", "script", "meta"]: |         if tagname in ["title", "script", "meta"]: | ||||||
|             pass |             pass | ||||||
|  |         elif tagname in ['style', 'link']: | ||||||
|  |             # TODO: Append CSS to self.css | ||||||
|  |             pass | ||||||
|         elif tagname == 'p': |         elif tagname == 'p': | ||||||
|             css = self.tag_css(tag, parent_css=parent_css) |             css = self.tag_css(tag, parent_css=parent_css) | ||||||
|  |             indent = css.pop('text-indent', '') | ||||||
|  |             if indent: | ||||||
|  |                 # TODO: If indent is different from current textblock's parindent | ||||||
|  |                 # start a new TextBlock | ||||||
|  |                 pass | ||||||
|  |             self.current_para.CR() # Put a paragraph end              | ||||||
|             self.current_block.append(self.current_para) |             self.current_block.append(self.current_para) | ||||||
|             self.current_para = Paragraph() |             self.current_para = Paragraph() | ||||||
|             process_text_tag(tag, css) |             process_text_tag(tag, css) | ||||||
| @ -302,13 +352,14 @@ class HTMLConvertor(object): | |||||||
|             self.current_para = Paragraph() |             self.current_para = Paragraph() | ||||||
|             self.current_page = Page() |             self.current_page = Page() | ||||||
|         else: |         else: | ||||||
|  |             css = self.tag_css(tag, parent_css=parent_css) | ||||||
|             for c in tag.contents: |             for c in tag.contents: | ||||||
|                 if isinstance(c, Comment): |                 if isinstance(c, Comment): | ||||||
|                     continue |                     continue | ||||||
|                 elif isinstance(c, Tag): |                 elif isinstance(c, Tag): | ||||||
|                     self.parse_tag(c) |                     self.parse_tag(c, css) | ||||||
|                 elif isinstance(c, NavigableString):                     |                 elif isinstance(c, NavigableString):                     | ||||||
|                     add_text(c, parent_css) |                     add_text(c, css) | ||||||
|                      |                      | ||||||
|     def writeto(self, path): |     def writeto(self, path): | ||||||
|         if path.lower().endswith('lrs'): |         if path.lower().endswith('lrs'): | ||||||
| @ -327,8 +378,33 @@ def process_file(path, options): | |||||||
|         book = Book(title=options.title, author=options.author, \ |         book = Book(title=options.title, author=options.author, \ | ||||||
|                     sourceencoding='utf8') |                     sourceencoding='utf8') | ||||||
|         conv = HTMLConvertor(book, soup) |         conv = HTMLConvertor(book, soup) | ||||||
|         name = os.path.splitext(os.path.basename(path))[0]+'.lrs' |         name = os.path.splitext(os.path.basename(path))[0]+'.lrf' | ||||||
|         os.chdir(cwd) |         os.chdir(cwd) | ||||||
|         conv.writeto(name)         |         conv.writeto(name)         | ||||||
|     finally: |     finally: | ||||||
|         os.chdir(cwd) |         os.chdir(cwd) | ||||||
|  |          | ||||||
|  | def main(): | ||||||
|  |     """ CLI for html -> lrf conversions """ | ||||||
|  |     parser = OptionParser(usage=\ | ||||||
|  |         """usage: %prog [options] mybook.txt | ||||||
|  |          | ||||||
|  |         %prog converts mybook.txt to mybook.lrf | ||||||
|  |         """\ | ||||||
|  |         ) | ||||||
|  |     parser.add_option("-t", "--title", action="store", type="string", \ | ||||||
|  |                     dest="title", help="Set the title") | ||||||
|  |     parser.add_option("-a", "--author", action="store", type="string", \ | ||||||
|  |                     dest="author", help="Set the author", default='Unknown') | ||||||
|  |     options, args = parser.parse_args() | ||||||
|  |     if len(args) != 1: | ||||||
|  |         parser.print_help() | ||||||
|  |         sys.exit(1) | ||||||
|  |     src = args[0] | ||||||
|  |     if options.title == None: | ||||||
|  |         options.title = os.path.splitext(os.path.basename(src))[0] | ||||||
|  |     process_file(src, options) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     main() | ||||||
| @ -1,266 +0,0 @@ | |||||||
| ##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net |  | ||||||
| ##    This program is free software; you can redistribute it and/or modify |  | ||||||
| ##    it under the terms of the GNU General Public License as published by |  | ||||||
| ##    the Free Software Foundation; either version 2 of the License, or |  | ||||||
| ##    (at your option) any later version. |  | ||||||
| ## |  | ||||||
| ##    This program is distributed in the hope that it will be useful, |  | ||||||
| ##    but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| ##    GNU General Public License for more details. |  | ||||||
| ## |  | ||||||
| ##    You should have received a copy of the GNU General Public License along |  | ||||||
| ##    with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
| 
 |  | ||||||
| """ |  | ||||||
| Thin ctypes based wrapper around libtidy. Example usage: |  | ||||||
| >>> from libtidy import parseString |  | ||||||
| >>> print parseString('<h1>fowehfow</h2>', \ |  | ||||||
|                        output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0) |  | ||||||
| <?xml version="1.0" encoding="us-ascii"?> |  | ||||||
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |  | ||||||
|     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |  | ||||||
| <html xmlns="http://www.w3.org/1999/xhtml"> |  | ||||||
|   <head> |  | ||||||
|     <title></title> |  | ||||||
|   </head> |  | ||||||
|   <body> |  | ||||||
|     <h1> |  | ||||||
|       fowehfow |  | ||||||
|     </h1> |  | ||||||
|   </body> |  | ||||||
| </html> |  | ||||||
| """ |  | ||||||
| 
 |  | ||||||
| import ctypes |  | ||||||
| from cStringIO import StringIO |  | ||||||
| import weakref |  | ||||||
| 
 |  | ||||||
| class TidyLibError(Exception): |  | ||||||
|     def __init__(self, arg): |  | ||||||
|         self.arg=arg |  | ||||||
| 
 |  | ||||||
| class InvalidOptionError(TidyLibError): |  | ||||||
|     def __str__(self): |  | ||||||
|         return "%s was not a valid Tidy option." % (self.arg) |  | ||||||
|     __repr__=__str__ |  | ||||||
| 
 |  | ||||||
| class OptionArgError(TidyLibError): |  | ||||||
|     def __init__(self, arg): |  | ||||||
|         self.arg=arg |  | ||||||
|     def __str__(self): |  | ||||||
|         return self.arg |  | ||||||
| 
 |  | ||||||
| # search the path for libtidy using the known names;  |  | ||||||
| thelib=None |  | ||||||
| for libname in ('cygtidy-0-99-0', 'libtidy', 'libtidy.so', 'tidylib'): |  | ||||||
|     try: |  | ||||||
|         thelib = getattr(ctypes.cdll, libname) |  | ||||||
|         break |  | ||||||
|     except OSError: |  | ||||||
|         pass |  | ||||||
| if not thelib: |  | ||||||
|     raise OSError("Couldn't find libtidy, please make sure it is installed.") |  | ||||||
| 
 |  | ||||||
| class Loader: |  | ||||||
|     """ |  | ||||||
|     I am a trivial wrapper that eliminates the need for tidy.tidyFoo,  |  | ||||||
|     so you can just access tidy.Foo |  | ||||||
|     """ |  | ||||||
|     def __init__(self): |  | ||||||
|         self.lib = thelib |  | ||||||
|     def __getattr__(self, name): |  | ||||||
|         try: |  | ||||||
|             return getattr(self.lib, "tidy%s" % name) |  | ||||||
|         # current ctypes uses ValueError, future will use AttributeError |  | ||||||
|         except (ValueError, AttributeError): |  | ||||||
|             return getattr(self.lib, name) |  | ||||||
| 
 |  | ||||||
| _tidy=Loader() |  | ||||||
| 
 |  | ||||||
| # define a callback to pass to Tidylib |  | ||||||
| def _putByte(handle, c): |  | ||||||
|     """Lookup sink by handle and call its putByte method""" |  | ||||||
|     sinkfactory[handle].putByte(c) |  | ||||||
|     return 0 |  | ||||||
| 
 |  | ||||||
| PUTBYTEFUNC = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_char)     |  | ||||||
| putByte = PUTBYTEFUNC(_putByte) |  | ||||||
| 
 |  | ||||||
| class _OutputSink(ctypes.Structure): |  | ||||||
|     _fields_ = [("sinkData", ctypes.c_int), |  | ||||||
|               ("putByte", PUTBYTEFUNC), |  | ||||||
|               ] |  | ||||||
| 
 |  | ||||||
| class _Sink: |  | ||||||
|     def __init__(self): |  | ||||||
|         self._data = StringIO() |  | ||||||
|         self.struct = _OutputSink() |  | ||||||
|         self.struct.putByte = putByte |  | ||||||
|          |  | ||||||
|     def putByte(self, c): |  | ||||||
|         self._data.write(c) |  | ||||||
|          |  | ||||||
|     def __str__(self): |  | ||||||
|         return self._data.getvalue() |  | ||||||
| 
 |  | ||||||
| class ReportItem: |  | ||||||
|     def __init__(self, err): |  | ||||||
|         self.err = err |  | ||||||
|         if err.startswith('line'): |  | ||||||
|             tokens = err.split(' ',6) |  | ||||||
|             self.severity = tokens[5][0] # W or E |  | ||||||
|             self.line = int(tokens[1]) |  | ||||||
|             self.col = int(tokens[3]) |  | ||||||
|             self.message = tokens[6] |  | ||||||
|         else: |  | ||||||
|             tokens = err.split(' ',1) |  | ||||||
|             self.severity = tokens[0][0] |  | ||||||
|             self.message = tokens[1] |  | ||||||
|             self.line = None |  | ||||||
|             self.col = None |  | ||||||
|         # TODO - parse emacs mode |  | ||||||
|      |  | ||||||
|     def __str__(self): |  | ||||||
|         severities = dict(W='Warning', E='Error', C='Config') |  | ||||||
|         try: |  | ||||||
|             if self.line: |  | ||||||
|                 return "line %d col %d - %s: %s" % (self.line, self.col, |  | ||||||
|                                                     severities[self.severity], |  | ||||||
|                                                     self.message) |  | ||||||
|              |  | ||||||
|             else: |  | ||||||
|                 return "%s: %s" % (severities[self.severity], self.message) |  | ||||||
|         except KeyError: |  | ||||||
|             return self.err |  | ||||||
| 
 |  | ||||||
|     def __repr__(self): |  | ||||||
|         return "%s('%s')" % (self.__class__.__name__, |  | ||||||
|                              str(self).replace("'", "\\'")) |  | ||||||
|          |  | ||||||
| class FactoryDict(dict): |  | ||||||
|     """I am a dict with a create method and no __setitem__.  This allows |  | ||||||
|     me to control my own keys. |  | ||||||
|     """ |  | ||||||
|     def create(self): |  | ||||||
|         """Subclasses should implement me to generate a new item""" |  | ||||||
|      |  | ||||||
|     def _setitem(self, name, value): |  | ||||||
|         dict.__setitem__(self, name, value) |  | ||||||
|      |  | ||||||
|     def __setitem__(self, name, value): |  | ||||||
|         raise TypeError, "Use create() to get a new object" |  | ||||||
|          |  | ||||||
| 
 |  | ||||||
| class SinkFactory(FactoryDict): |  | ||||||
|     """Mapping for lookup of sinks by handle""" |  | ||||||
|     def __init__(self): |  | ||||||
|         FactoryDict.__init__(self) |  | ||||||
|         self.lastsink = 0 |  | ||||||
|      |  | ||||||
|     def create(self): |  | ||||||
|         sink = _Sink() |  | ||||||
|         sink.struct.sinkData = self.lastsink |  | ||||||
|         FactoryDict._setitem(self, self.lastsink, sink) |  | ||||||
|         self.lastsink = self.lastsink+1 |  | ||||||
|         return sink |  | ||||||
| 
 |  | ||||||
| sinkfactory = SinkFactory() |  | ||||||
| 
 |  | ||||||
| class _Document(object): |  | ||||||
|     def __init__(self): |  | ||||||
|         self.cdoc = _tidy.Create() |  | ||||||
|         self.errsink = sinkfactory.create() |  | ||||||
|         _tidy.SetErrorSink(self.cdoc, ctypes.byref(self.errsink.struct)) |  | ||||||
|      |  | ||||||
|     def write(self, stream): |  | ||||||
|         stream.write(str(self)) |  | ||||||
|      |  | ||||||
|     def get_errors(self): |  | ||||||
|         ret = [] |  | ||||||
|         for line in str(self.errsink).split('\n'): |  | ||||||
|             line = line.strip(' \n\r') |  | ||||||
|             if line: ret.append(ReportItem(line)) |  | ||||||
|         return ret |  | ||||||
|      |  | ||||||
|     errors=property(get_errors) |  | ||||||
|      |  | ||||||
|     def __str__(self): |  | ||||||
|         stlen = ctypes.c_int(8192) |  | ||||||
|         st = ctypes.c_buffer(stlen.value) |  | ||||||
|         rc = _tidy.SaveString(self.cdoc, st, ctypes.byref(stlen)) |  | ||||||
|         if rc==-12: # buffer too small |  | ||||||
|             st = ctypes.c_buffer(stlen.value) |  | ||||||
|             _tidy.SaveString(self.cdoc, st, ctypes.byref(stlen)) |  | ||||||
|         return st.value |  | ||||||
| 
 |  | ||||||
| errors = {'missing or malformed argument for option: ': OptionArgError, |  | ||||||
|           'unknown option: ': InvalidOptionError, |  | ||||||
|           } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class DocumentFactory(FactoryDict): |  | ||||||
|     def _setOptions(self, doc, **options): |  | ||||||
|         for k in options.keys(): |  | ||||||
|              |  | ||||||
|             # this will flush out most argument type errors... |  | ||||||
|             if options[k] is None: options[k] = '' |  | ||||||
|              |  | ||||||
|             _tidy.OptParseValue(doc.cdoc,  |  | ||||||
|                                 k.replace('_', '-'),  |  | ||||||
|                                 str(options[k])) |  | ||||||
|             if doc.errors: |  | ||||||
|                 match=filter(doc.errors[-1].message.startswith, errors.keys()) |  | ||||||
|                 if match: |  | ||||||
|                     raise errors[match[0]](doc.errors[-1].message) |  | ||||||
|      |  | ||||||
|     def load(self, doc, arg, loader): |  | ||||||
|         loader(doc.cdoc, arg) |  | ||||||
|         _tidy.CleanAndRepair(doc.cdoc) |  | ||||||
|      |  | ||||||
|     def loadFile(self, doc, filename): |  | ||||||
|         self.load(doc, filename, _tidy.ParseFile) |  | ||||||
|      |  | ||||||
|     def loadString(self, doc, st): |  | ||||||
|         self.load(doc, st, _tidy.ParseString) |  | ||||||
|      |  | ||||||
|     def _create(self, *args, **kwargs): |  | ||||||
|         doc = _Document() |  | ||||||
|         self._setOptions(doc, **kwargs) |  | ||||||
|         ref = weakref.ref(doc, self.releaseDoc) |  | ||||||
|         FactoryDict._setitem(self, ref, doc.cdoc) |  | ||||||
|         return doc |  | ||||||
|      |  | ||||||
|     def parse(self, filename, *args, **kwargs): |  | ||||||
|         """ |  | ||||||
|         Open and process filename as an HTML file, returning a |  | ||||||
|         processed document object. |  | ||||||
|         @param kwargs: named options to pass to TidyLib for processing |  | ||||||
|         the input file. |  | ||||||
|         @param filename: the name of a file to process |  | ||||||
|         @return: a document object |  | ||||||
|         """ |  | ||||||
|         doc = self._create(**kwargs) |  | ||||||
|         self.loadFile(doc, filename) |  | ||||||
|         return doc |  | ||||||
|      |  | ||||||
|     def parseString(self, st, *args, **kwargs): |  | ||||||
|         """ |  | ||||||
|         Use st as an HTML file, and process it, returning a |  | ||||||
|         document object. |  | ||||||
|         @param kwargs: named options to pass to TidyLib for processing |  | ||||||
|         the input file. |  | ||||||
|         @param st: the string to parse |  | ||||||
|         @return: a document object |  | ||||||
|         """ |  | ||||||
|         doc = self._create(**kwargs) |  | ||||||
|         self.loadString(doc, st) |  | ||||||
|         return doc |  | ||||||
|      |  | ||||||
|     def releaseDoc(self, ref): |  | ||||||
|         _tidy.Release(self[ref]) |  | ||||||
|      |  | ||||||
| docfactory = DocumentFactory() |  | ||||||
| parse = docfactory.parse |  | ||||||
| parseString = docfactory.parseString |  | ||||||
| @ -17,19 +17,14 @@ import shutil | |||||||
| import sys | import sys | ||||||
| import hashlib | import hashlib | ||||||
| import re | import re | ||||||
| import time |  | ||||||
| import pkg_resources | import pkg_resources | ||||||
| import subprocess | import subprocess | ||||||
| from tempfile import mkdtemp | from tempfile import mkdtemp | ||||||
| from optparse import OptionParser | from optparse import OptionParser | ||||||
| import xml.dom.minidom as dom |  | ||||||
| 
 |  | ||||||
| from libprs500.lrf import ConversionError | from libprs500.lrf import ConversionError | ||||||
| from libprs500.lrf.meta import LRFException, LRFMetaFile | from libprs500.lrf.meta import LRFException, LRFMetaFile | ||||||
| from libprs500.ptempfile import PersistentTemporaryFile | from libprs500.ptempfile import PersistentTemporaryFile | ||||||
| 
 | 
 | ||||||
| _bbebook = 'BBeBook-0.2.jar' |  | ||||||
| 
 |  | ||||||
| def generate_thumbnail(path): | def generate_thumbnail(path): | ||||||
|     """ Generate a JPEG thumbnail of size ~ 128x128 (aspect ratio preserved)""" |     """ Generate a JPEG thumbnail of size ~ 128x128 (aspect ratio preserved)""" | ||||||
|     try: |     try: | ||||||
| @ -45,30 +40,6 @@ def generate_thumbnail(path): | |||||||
|     im.save(thumb.name) |     im.save(thumb.name) | ||||||
|     return thumb |     return thumb | ||||||
|      |      | ||||||
| def create_xml(cfg): |  | ||||||
|     doc = dom.getDOMImplementation().createDocument(None, None, None) |  | ||||||
|     def add_field(parent, tag, value): |  | ||||||
|         elem = doc.createElement(tag) |  | ||||||
|         elem.appendChild(doc.createTextNode(value)) |  | ||||||
|         parent.appendChild(elem) |  | ||||||
|      |  | ||||||
|     info = doc.createElement('Info') |  | ||||||
|     info.setAttribute('version', '1.0') |  | ||||||
|     book_info = doc.createElement('BookInfo') |  | ||||||
|     doc_info  = doc.createElement('DocInfo') |  | ||||||
|     info.appendChild(book_info) |  | ||||||
|     info.appendChild(doc_info) |  | ||||||
|     add_field(book_info, 'File', cfg['File']) |  | ||||||
|     add_field(doc_info, 'Output', cfg['Output']) |  | ||||||
|     for field in ['Title', 'Author', 'BookID', 'Publisher', 'Label', \ |  | ||||||
|                   'Category', 'Classification', 'Icon', 'Cover', 'FreeText']: |  | ||||||
|         if cfg.has_key(field): |  | ||||||
|             add_field(book_info, field, cfg[field]) |  | ||||||
|     add_field(doc_info, 'Language', 'en') |  | ||||||
|     add_field(doc_info, 'Creator', _bbebook) |  | ||||||
|     add_field(doc_info, 'CreationDate', time.strftime('%Y-%m-%d', time.gmtime())) |  | ||||||
|     doc.appendChild(info) |  | ||||||
|     return doc.toxml() |  | ||||||
| 
 | 
 | ||||||
| def makelrf(author=None, title=None, \ | def makelrf(author=None, title=None, \ | ||||||
|             thumbnail=None, src=None, odir=".",\ |             thumbnail=None, src=None, odir=".",\ | ||||||
| @ -150,127 +121,3 @@ def makelrf(author=None, title=None, \ | |||||||
|         if dirpath:  |         if dirpath:  | ||||||
|             shutil.rmtree(dirpath, True) |             shutil.rmtree(dirpath, True) | ||||||
| 
 | 
 | ||||||
| def txt(): |  | ||||||
|     """ CLI for txt -> lrf conversions """ |  | ||||||
|     parser = OptionParser(usage=\ |  | ||||||
|         """usage: %prog [options] mybook.txt |  | ||||||
|          |  | ||||||
|         %prog converts mybook.txt to mybook.lrf |  | ||||||
|         """\ |  | ||||||
|         ) |  | ||||||
|     parser.add_option("-t", "--title", action="store", type="string", \ |  | ||||||
|                     dest="title", help="Set the title") |  | ||||||
|     parser.add_option("-a", "--author", action="store", type="string", \ |  | ||||||
|                     dest="author", help="Set the author", default='Unknown') |  | ||||||
|     defenc = 'cp1252' |  | ||||||
|     enchelp = 'Set the encoding used to decode ' + \ |  | ||||||
|               'the text in mybook.txt. Default encoding is ' + defenc |  | ||||||
|     parser.add_option('-e', '--encoding', action='store', type='string', \ |  | ||||||
|                       dest='encoding', help=enchelp, default=defenc) |  | ||||||
|     options, args = parser.parse_args() |  | ||||||
|     if len(args) != 1: |  | ||||||
|         parser.print_help() |  | ||||||
|         sys.exit(1) |  | ||||||
|     src = args[0] |  | ||||||
|     if options.title == None: |  | ||||||
|         options.title = os.path.splitext(os.path.basename(src))[0] |  | ||||||
|     try: |  | ||||||
|         convert_txt(src, options) |  | ||||||
|     except ConversionError, err: |  | ||||||
|         print >>sys.stderr, err |  | ||||||
|         sys.exit(1) |  | ||||||
|          |  | ||||||
|      |  | ||||||
| def convert_txt(path, options): |  | ||||||
|     """ |  | ||||||
|     Convert the text file at C{path} into an lrf file. |  | ||||||
|     @param options: Object with the following attributes: |  | ||||||
|                     C{author}, C{title}, C{encoding} (the assumed encoding of  |  | ||||||
|                     the text in C{path}.) |  | ||||||
|     """ |  | ||||||
|     import fileinput |  | ||||||
|     from libprs500.lrf.pylrs.pylrs import Book |  | ||||||
|     book = Book(title=options.title, author=options.author, \ |  | ||||||
|                 sourceencoding=options.encoding) |  | ||||||
|     buffer = '' |  | ||||||
|     block = book.Page().TextBlock() |  | ||||||
|     for line in fileinput.input(path): |  | ||||||
|         line = line.strip() |  | ||||||
|         if line: |  | ||||||
|             buffer += line |  | ||||||
|         else: |  | ||||||
|             block.Paragraph(buffer)             |  | ||||||
|             buffer = '' |  | ||||||
|     basename = os.path.basename(path) |  | ||||||
|     name = os.path.splitext(basename)[0]+'.lrf' |  | ||||||
|     try:  |  | ||||||
|         book.renderLrf(name) |  | ||||||
|     except UnicodeDecodeError: |  | ||||||
|         raise ConversionError(path + ' is not encoded in ' + \ |  | ||||||
|                               options.encoding +'. Specify the '+ \ |  | ||||||
|                               'correct encoding with the -e option.') |  | ||||||
|     return os.path.abspath(name) |  | ||||||
|      |  | ||||||
| 
 |  | ||||||
| def html(): |  | ||||||
|     """ CLI for html -> lrf conversions """ |  | ||||||
|     parser = OptionParser(usage=\ |  | ||||||
|         """usage: %prog [options] mybook.txt |  | ||||||
|          |  | ||||||
|         %prog converts mybook.txt to mybook.lrf |  | ||||||
|         """\ |  | ||||||
|         ) |  | ||||||
|     parser.add_option("-t", "--title", action="store", type="string", \ |  | ||||||
|                     dest="title", help="Set the title") |  | ||||||
|     parser.add_option("-a", "--author", action="store", type="string", \ |  | ||||||
|                     dest="author", help="Set the author", default='Unknown') |  | ||||||
|     options, args = parser.parse_args() |  | ||||||
|     if len(args) != 1: |  | ||||||
|         parser.print_help() |  | ||||||
|         sys.exit(1) |  | ||||||
|     src = args[0] |  | ||||||
|     if options.title == None: |  | ||||||
|         options.title = os.path.splitext(os.path.basename(src))[0] |  | ||||||
|     from libprs500.lrf.html.convert import process_file |  | ||||||
|     process_file(src, options) |  | ||||||
| 
 |  | ||||||
| def main(cargs=None): |  | ||||||
|     parser = OptionParser(usage=\ |  | ||||||
|         """usage: %prog [options] mybook.[html|pdf|rar] |  | ||||||
|          |  | ||||||
|         %prog converts mybook to mybook.lrf |  | ||||||
|         If you specify a rar file you must have the unrar command line client |  | ||||||
|         installed. makelrf assumes the rar file is an archive containing the |  | ||||||
|         html file you want converted."""\ |  | ||||||
|         ) |  | ||||||
|      |  | ||||||
|     parser.add_option("-t", "--title", action="store", type="string", \ |  | ||||||
|                     dest="title", help="Set the book title") |  | ||||||
|     parser.add_option("-a", "--author", action="store", type="string", \ |  | ||||||
|                     dest="author", help="Set the author") |  | ||||||
|     parser.add_option('-r', '--rasterize', action='store_false', \ |  | ||||||
|                     dest="rasterize",  |  | ||||||
|                     help="Convert pdfs into image files.") |  | ||||||
|     parser.add_option('-c', '--cover', action='store', dest='cover',\ |  | ||||||
|                     help="Path to a graphic that will be set as the cover. "\ |  | ||||||
|                     "If it is specified the thumbnail is automatically "\ |  | ||||||
|                     "generated from it") |  | ||||||
|     parser.add_option("--thumbnail", action="store", type="string", \ |  | ||||||
|                     dest="thumbnail", \ |  | ||||||
|                     help="Path to a graphic that will be set as the thumbnail") |  | ||||||
|     if not cargs: |  | ||||||
|         cargs = sys.argv |  | ||||||
|     options, args = parser.parse_args() |  | ||||||
|     if len(args) != 1: |  | ||||||
|         parser.print_help() |  | ||||||
|         sys.exit(1) |  | ||||||
|     src = args[0] |  | ||||||
|     root, ext = os.path.splitext(src) |  | ||||||
|     if ext not in ['.html', '.pdf', '.rar']: |  | ||||||
|         print >> sys.stderr, "Can only convert files ending in .html|.pdf|.rar" |  | ||||||
|         parser.print_help() |  | ||||||
|         sys.exit(1) |  | ||||||
|     name = makelrf(author=options.author, title=options.title, \ |  | ||||||
|         thumbnail=options.thumbnail, src=src, cover=options.cover, \ |  | ||||||
|         rasterize=options.rasterize) |  | ||||||
|     print "LRF generated:", name |  | ||||||
|  | |||||||
							
								
								
									
										14
									
								
								src/libprs500/lrf/txt/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								src/libprs500/lrf/txt/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | ##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net | ||||||
|  | ##    This program is free software; you can redistribute it and/or modify | ||||||
|  | ##    it under the terms of the GNU General Public License as published by | ||||||
|  | ##    the Free Software Foundation; either version 2 of the License, or | ||||||
|  | ##    (at your option) any later version. | ||||||
|  | ## | ||||||
|  | ##    This program is distributed in the hope that it will be useful, | ||||||
|  | ##    but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | ##    GNU General Public License for more details. | ||||||
|  | ## | ||||||
|  | ##    You should have received a copy of the GNU General Public License along | ||||||
|  | ##    with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
							
								
								
									
										86
									
								
								src/libprs500/lrf/txt/convert_from.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								src/libprs500/lrf/txt/convert_from.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,86 @@ | |||||||
|  | ##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net | ||||||
|  | ##    This program is free software; you can redistribute it and/or modify | ||||||
|  | ##    it under the terms of the GNU General Public License as published by | ||||||
|  | ##    the Free Software Foundation; either version 2 of the License, or | ||||||
|  | ##    (at your option) any later version. | ||||||
|  | ## | ||||||
|  | ##    This program is distributed in the hope that it will be useful, | ||||||
|  | ##    but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | ##    GNU General Public License for more details. | ||||||
|  | ## | ||||||
|  | ##    You should have received a copy of the GNU General Public License along | ||||||
|  | ##    with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  | """ | ||||||
|  | Convert .txt files to .lrf | ||||||
|  | """ | ||||||
|  | import os, sys | ||||||
|  | from optparse import OptionParser | ||||||
|  | 
 | ||||||
|  | from libprs500.lrf import ConversionError | ||||||
|  | 
 | ||||||
|  | def main(): | ||||||
|  |     """ CLI for txt -> lrf conversions """ | ||||||
|  |     parser = OptionParser(usage=\ | ||||||
|  |         """usage: %prog [options] mybook.txt | ||||||
|  |          | ||||||
|  |         %prog converts mybook.txt to mybook.lrf | ||||||
|  |         """\ | ||||||
|  |         ) | ||||||
|  |     parser.add_option("-t", "--title", action="store", type="string", \ | ||||||
|  |                     dest="title", help="Set the title") | ||||||
|  |     parser.add_option("-a", "--author", action="store", type="string", \ | ||||||
|  |                     dest="author", help="Set the author", default='Unknown') | ||||||
|  |     defenc = 'cp1252' | ||||||
|  |     enchelp = 'Set the encoding used to decode ' + \ | ||||||
|  |               'the text in mybook.txt. Default encoding is ' + defenc | ||||||
|  |     parser.add_option('-e', '--encoding', action='store', type='string', \ | ||||||
|  |                       dest='encoding', help=enchelp, default=defenc) | ||||||
|  |     options, args = parser.parse_args() | ||||||
|  |     if len(args) != 1: | ||||||
|  |         parser.print_help() | ||||||
|  |         sys.exit(1) | ||||||
|  |     src = args[0] | ||||||
|  |     if options.title == None: | ||||||
|  |         options.title = os.path.splitext(os.path.basename(src))[0] | ||||||
|  |     try: | ||||||
|  |         convert_txt(src, options) | ||||||
|  |     except ConversionError, err: | ||||||
|  |         print >>sys.stderr, err | ||||||
|  |         sys.exit(1) | ||||||
|  |          | ||||||
|  |      | ||||||
|  | def convert_txt(path, options): | ||||||
|  |     """ | ||||||
|  |     Convert the text file at C{path} into an lrf file. | ||||||
|  |     @param options: Object with the following attributes: | ||||||
|  |                     C{author}, C{title}, C{encoding} (the assumed encoding of  | ||||||
|  |                     the text in C{path}.) | ||||||
|  |     """ | ||||||
|  |     import fileinput | ||||||
|  |     from libprs500.lrf.pylrs.pylrs import Book | ||||||
|  |     book = Book(title=options.title, author=options.author, \ | ||||||
|  |                 sourceencoding=options.encoding) | ||||||
|  |     buffer = '' | ||||||
|  |     block = book.Page().TextBlock() | ||||||
|  |     for line in fileinput.input(path): | ||||||
|  |         line = line.strip() | ||||||
|  |         if line: | ||||||
|  |             buffer += line | ||||||
|  |         else: | ||||||
|  |             block.Paragraph(buffer)             | ||||||
|  |             buffer = '' | ||||||
|  |     basename = os.path.basename(path) | ||||||
|  |     name = os.path.splitext(basename)[0]+'.lrf' | ||||||
|  |     try:  | ||||||
|  |         book.renderLrf(name) | ||||||
|  |     except UnicodeDecodeError: | ||||||
|  |         raise ConversionError(path + ' is not encoded in ' + \ | ||||||
|  |                               options.encoding +'. Specify the '+ \ | ||||||
|  |                               'correct encoding with the -e option.') | ||||||
|  |     return os.path.abspath(name) | ||||||
|  |      | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     main() | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user