mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-26 08:12:25 -04:00 
			
		
		
		
	FB2 Output: Add cover to FB2 metadata. TXT Input: Support for textile markup
This commit is contained in:
		
						commit
						ff37f2e9fc
					
				| @ -16,7 +16,6 @@ import uuid | |||||||
| 
 | 
 | ||||||
| from lxml import etree | from lxml import etree | ||||||
| 
 | 
 | ||||||
| from calibre import guess_type |  | ||||||
| from calibre import prepare_string_for_xml | from calibre import prepare_string_for_xml | ||||||
| from calibre.constants import __appname__, __version__ | from calibre.constants import __appname__, __version__ | ||||||
| from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace | from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace | ||||||
| @ -102,6 +101,7 @@ class FB2MLizer(object): | |||||||
|         metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) |         metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) | ||||||
|         metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' |         metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' | ||||||
|         metadata['id'] = None |         metadata['id'] = None | ||||||
|  |         metadata['cover'] = self.get_cover() | ||||||
| 
 | 
 | ||||||
|         author_parts = self.oeb_book.metadata.creator[0].value.split(' ') |         author_parts = self.oeb_book.metadata.creator[0].value.split(' ') | ||||||
|         if len(author_parts) == 1: |         if len(author_parts) == 1: | ||||||
| @ -124,6 +124,7 @@ class FB2MLizer(object): | |||||||
|             metadata['id'] = str(uuid.uuid4()) |             metadata['id'] = str(uuid.uuid4()) | ||||||
| 
 | 
 | ||||||
|         for key, value in metadata.items(): |         for key, value in metadata.items(): | ||||||
|  |             if not key == 'cover': | ||||||
|                 metadata[key] = prepare_string_for_xml(value) |                 metadata[key] = prepare_string_for_xml(value) | ||||||
| 
 | 
 | ||||||
|         return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \ |         return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \ | ||||||
| @ -136,6 +137,7 @@ class FB2MLizer(object): | |||||||
|                             '<last-name>%(author_last)s</last-name>' \ |                             '<last-name>%(author_last)s</last-name>' \ | ||||||
|                         '</author>' \ |                         '</author>' \ | ||||||
|                         '<book-title>%(title)s</book-title>' \ |                         '<book-title>%(title)s</book-title>' \ | ||||||
|  |                         '%(cover)s' \ | ||||||
|                         '<lang>%(lang)s</lang>' \ |                         '<lang>%(lang)s</lang>' \ | ||||||
|                     '</title-info>' \ |                     '</title-info>' \ | ||||||
|                     '<document-info>' \ |                     '<document-info>' \ | ||||||
| @ -154,6 +156,41 @@ class FB2MLizer(object): | |||||||
|     def fb2_footer(self): |     def fb2_footer(self): | ||||||
|         return u'</FictionBook>' |         return u'</FictionBook>' | ||||||
| 
 | 
 | ||||||
|  |     def get_cover(self): | ||||||
|  |         cover_href = None | ||||||
|  | 
 | ||||||
|  |         # Get the raster cover if it's available. | ||||||
|  |         if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: | ||||||
|  |             id = unicode(self.oeb_book.metadata.cover[0]) | ||||||
|  |             cover_item = self.oeb_book.manifest.ids[id] | ||||||
|  |             if cover_item.media_type in OEB_RASTER_IMAGES: | ||||||
|  |                 cover_href = cover_item.href | ||||||
|  |             print 1 | ||||||
|  |         else: | ||||||
|  |             # Figure out if we have a title page or a cover page | ||||||
|  |             page_name = '' | ||||||
|  |             if 'titlepage' in self.oeb_book.guide: | ||||||
|  |                 page_name = 'titlepage' | ||||||
|  |             elif 'cover' in self.oeb_book.guide: | ||||||
|  |                 page_name = 'cover' | ||||||
|  | 
 | ||||||
|  |             if page_name: | ||||||
|  |                 cover_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[page_name].href] | ||||||
|  |                 # Get the first image in the page | ||||||
|  |                 for img in cover_item.xpath('//img'): | ||||||
|  |                     cover_href = cover_item.abshref(img.get('src')) | ||||||
|  |                     print cover_href | ||||||
|  |                     break | ||||||
|  | 
 | ||||||
|  |         if cover_href: | ||||||
|  |             # Only write the image tag if it is in the manifest. | ||||||
|  |             if cover_href in self.oeb_book.manifest.hrefs.keys(): | ||||||
|  |                 if cover_href not in self.image_hrefs.keys(): | ||||||
|  |                     self.image_hrefs[cover_href] = '_%s.jpg' % len(self.image_hrefs.keys()) | ||||||
|  |             return u'<coverpage><image xlink:href="#%s" /></coverpage>' % self.image_hrefs[cover_href] | ||||||
|  | 
 | ||||||
|  |         return u'' | ||||||
|  | 
 | ||||||
|     def get_text(self): |     def get_text(self): | ||||||
|         text = ['<body>'] |         text = ['<body>'] | ||||||
| 
 | 
 | ||||||
| @ -162,23 +199,6 @@ class FB2MLizer(object): | |||||||
|             text.append('<section>') |             text.append('<section>') | ||||||
|             self.section_level += 1 |             self.section_level += 1 | ||||||
| 
 | 
 | ||||||
|         # Insert the title page / cover into the spine if it is not already referenced. |  | ||||||
|         title_name = u'' |  | ||||||
|         if 'titlepage' in self.oeb_book.guide: |  | ||||||
|             title_name = 'titlepage' |  | ||||||
|         elif 'cover' in self.oeb_book.guide: |  | ||||||
|             title_name = 'cover' |  | ||||||
|         if title_name: |  | ||||||
|             title_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[title_name].href] |  | ||||||
|             if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml': |  | ||||||
|                 self.oeb_book.spine.insert(0, title_item, True) |  | ||||||
|         # Create xhtml page to reference cover image so it can be used. |  | ||||||
|         if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: |  | ||||||
|             id = unicode(self.oeb_book.metadata.cover[0]) |  | ||||||
|             cover_item = self.oeb_book.manifest.ids[id] |  | ||||||
|             if cover_item.media_type in OEB_RASTER_IMAGES: |  | ||||||
|                 self.insert_image_cover(cover_item.href) |  | ||||||
|          |  | ||||||
|         for item in self.oeb_book.spine: |         for item in self.oeb_book.spine: | ||||||
|             self.log.debug('Converting %s to FictionBook2 XML' % item.href) |             self.log.debug('Converting %s to FictionBook2 XML' % item.href) | ||||||
|             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) |             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) | ||||||
| @ -203,17 +223,6 @@ class FB2MLizer(object): | |||||||
| 
 | 
 | ||||||
|         return ''.join(text) + '</body>' |         return ''.join(text) + '</body>' | ||||||
| 
 | 
 | ||||||
|     def insert_image_cover(self, image_href): |  | ||||||
|         from calibre.ebooks.oeb.base import RECOVER_PARSER |  | ||||||
|         try: |  | ||||||
|             root = etree.fromstring(u'<html xmlns="%s"><body><img src="%s" /></body></html>' % (XHTML_NS, image_href), parser=RECOVER_PARSER) |  | ||||||
|         except: |  | ||||||
|             root = etree.fromstring(u'', parser=RECOVER_PARSER) |  | ||||||
|          |  | ||||||
|         id, href = self.oeb_book.manifest.generate('fb2_cover', 'fb2_cover.xhtml') |  | ||||||
|         item = self.oeb_book.manifest.add(id, href, guess_type(href)[0], data=root) |  | ||||||
|         self.oeb_book.spine.insert(0, item, True) |  | ||||||
| 
 |  | ||||||
|     def fb2mlize_images(self): |     def fb2mlize_images(self): | ||||||
|         ''' |         ''' | ||||||
|         This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function. |         This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function. | ||||||
|  | |||||||
							
								
								
									
										6
									
								
								src/calibre/ebooks/textile/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								src/calibre/ebooks/textile/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,6 @@ | |||||||
|  | from functions import textile, textile_restricted, Textile | ||||||
|  | 
 | ||||||
|  | if False: | ||||||
|  |     textile, textile_restricted, Textile | ||||||
|  | 
 | ||||||
|  | __all__ = ['textile', 'textile_restricted'] | ||||||
							
								
								
									
										981
									
								
								src/calibre/ebooks/textile/functions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										981
									
								
								src/calibre/ebooks/textile/functions.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,981 @@ | |||||||
|  | #!/usr/bin/env python | ||||||
|  | """ | ||||||
|  | PyTextile | ||||||
|  | 
 | ||||||
|  | A Humane Web Text Generator | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | __version__ = '2.1.4' | ||||||
|  | 
 | ||||||
|  | __date__ = '2009/12/04' | ||||||
|  | 
 | ||||||
|  | __copyright__ = """ | ||||||
|  | Copyright (c) 2009, Jason Samsa, http://jsamsa.com/ | ||||||
|  | Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/ | ||||||
|  | Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/ | ||||||
|  | 
 | ||||||
|  | Original PHP Version: | ||||||
|  | Copyright (c) 2003-2004, Dean Allen <dean@textism.com> | ||||||
|  | All rights reserved. | ||||||
|  | 
 | ||||||
|  | Thanks to Carlo Zottmann <carlo@g-blog.net> for refactoring | ||||||
|  | Textile's procedural code into a class framework | ||||||
|  | 
 | ||||||
|  | Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/ | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | __license__ = """ | ||||||
|  | L I C E N S E | ||||||
|  | ============= | ||||||
|  | Redistribution and use in source and binary forms, with or without | ||||||
|  | modification, are permitted provided that the following conditions are met: | ||||||
|  | 
 | ||||||
|  | * Redistributions of source code must retain the above copyright notice, | ||||||
|  |   this list of conditions and the following disclaimer. | ||||||
|  | 
 | ||||||
|  | * Redistributions in binary form must reproduce the above copyright notice, | ||||||
|  |   this list of conditions and the following disclaimer in the documentation | ||||||
|  |   and/or other materials provided with the distribution. | ||||||
|  | 
 | ||||||
|  | * Neither the name Textile nor the names of its contributors may be used to | ||||||
|  |   endorse or promote products derived from this software without specific | ||||||
|  |   prior written permission. | ||||||
|  | 
 | ||||||
|  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||||
|  | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||||
|  | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||||
|  | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||||||
|  | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||||||
|  | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||||||
|  | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||||
|  | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||||||
|  | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||||||
|  | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||||||
|  | POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | import re | ||||||
|  | import uuid | ||||||
|  | from urlparse import urlparse | ||||||
|  | 
 | ||||||
|  | def _normalize_newlines(string): | ||||||
|  |     out = re.sub(r'\r\n', '\n', string) | ||||||
|  |     out = re.sub(r'\n{3,}', '\n\n', out) | ||||||
|  |     out = re.sub(r'\n\s*\n', '\n\n', out) | ||||||
|  |     out = re.sub(r'"$', '" ', out) | ||||||
|  |     return out | ||||||
|  | 
 | ||||||
|  | def getimagesize(url): | ||||||
|  |     """ | ||||||
|  |     Attempts to determine an image's width and height, and returns a string | ||||||
|  |     suitable for use in an <img> tag, or None in case of failure. | ||||||
|  |     Requires that PIL is installed. | ||||||
|  | 
 | ||||||
|  |     >>> getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif") | ||||||
|  |     ... #doctest: +ELLIPSIS, +SKIP | ||||||
|  |     'width="..." height="..."' | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         import ImageFile | ||||||
|  |         import urllib2 | ||||||
|  |     except ImportError: | ||||||
|  |         return None | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         p = ImageFile.Parser() | ||||||
|  |         f = urllib2.urlopen(url) | ||||||
|  |         while True: | ||||||
|  |             s = f.read(1024) | ||||||
|  |             if not s: | ||||||
|  |                 break | ||||||
|  |             p.feed(s) | ||||||
|  |             if p.image: | ||||||
|  |                 return 'width="%i" height="%i"' % p.image.size | ||||||
|  |     except (IOError, ValueError): | ||||||
|  |         return None | ||||||
|  | 
 | ||||||
|  | class Textile(object): | ||||||
|  |     hlgn = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))' | ||||||
|  |     vlgn = r'[\-^~]' | ||||||
|  |     clas = r'(?:\([^)]+\))' | ||||||
|  |     lnge = r'(?:\[[^\]]+\])' | ||||||
|  |     styl = r'(?:\{[^}]+\})' | ||||||
|  |     cspn = r'(?:\\\d+)' | ||||||
|  |     rspn = r'(?:\/\d+)' | ||||||
|  |     a = r'(?:%s|%s)*' % (hlgn, vlgn) | ||||||
|  |     s = r'(?:%s|%s)*' % (cspn, rspn) | ||||||
|  |     c = r'(?:%s)*' % '|'.join([clas, styl, lnge, hlgn]) | ||||||
|  | 
 | ||||||
|  |     pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]' | ||||||
|  |     # urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]' | ||||||
|  |     urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]' | ||||||
|  | 
 | ||||||
|  |     url_schemes = ('http', 'https', 'ftp', 'mailto') | ||||||
|  | 
 | ||||||
|  |     btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p') | ||||||
|  |     btag_lite = ('bq', 'bc', 'p') | ||||||
|  | 
 | ||||||
|  |     glyph_defaults = ( | ||||||
|  |         ('txt_quote_single_open',  '‘'), | ||||||
|  |         ('txt_quote_single_close', '’'), | ||||||
|  |         ('txt_quote_double_open',  '“'), | ||||||
|  |         ('txt_quote_double_close', '”'), | ||||||
|  |         ('txt_apostrophe',         '’'), | ||||||
|  |         ('txt_prime',              '′'), | ||||||
|  |         ('txt_prime_double',       '″'), | ||||||
|  |         ('txt_ellipsis',           '…'), | ||||||
|  |         ('txt_emdash',             '—'), | ||||||
|  |         ('txt_endash',             '–'), | ||||||
|  |         ('txt_dimension',          '×'), | ||||||
|  |         ('txt_trademark',          '™'), | ||||||
|  |         ('txt_registered',         '®'), | ||||||
|  |         ('txt_copyright',          '©'), | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     def __init__(self, restricted=False, lite=False, noimage=False): | ||||||
|  |         """docstring for __init__""" | ||||||
|  |         self.restricted = restricted | ||||||
|  |         self.lite = lite | ||||||
|  |         self.noimage = noimage | ||||||
|  |         self.get_sizes = False | ||||||
|  |         self.fn = {} | ||||||
|  |         self.urlrefs = {} | ||||||
|  |         self.shelf = {} | ||||||
|  |         self.rel = '' | ||||||
|  |         self.html_type = 'xhtml' | ||||||
|  | 
 | ||||||
|  |     def textile(self, text, rel=None, head_offset=0, html_type='xhtml'): | ||||||
|  |         """ | ||||||
|  |         >>> import textile | ||||||
|  |         >>> textile.textile('some textile') | ||||||
|  |         u'\\t<p>some textile</p>' | ||||||
|  |         """ | ||||||
|  |         self.html_type = html_type | ||||||
|  | 
 | ||||||
|  |         # text = unicode(text) | ||||||
|  |         text = _normalize_newlines(text) | ||||||
|  | 
 | ||||||
|  |         if self.restricted: | ||||||
|  |             text = self.encode_html(text, quotes=False) | ||||||
|  | 
 | ||||||
|  |         if rel: | ||||||
|  |             self.rel = ' rel="%s"' % rel | ||||||
|  | 
 | ||||||
|  |         text = self.getRefs(text) | ||||||
|  | 
 | ||||||
|  |         text = self.block(text, int(head_offset)) | ||||||
|  | 
 | ||||||
|  |         text = self.retrieve(text) | ||||||
|  | 
 | ||||||
|  |         return text | ||||||
|  | 
 | ||||||
|  |     def pba(self, input, element=None): | ||||||
|  |         """ | ||||||
|  |         Parse block attributes. | ||||||
|  | 
 | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.pba(r'\3') | ||||||
|  |         '' | ||||||
|  |         >>> t.pba(r'\\3', element='td') | ||||||
|  |         ' colspan="3"' | ||||||
|  |         >>> t.pba(r'/4', element='td') | ||||||
|  |         ' rowspan="4"' | ||||||
|  |         >>> t.pba(r'\\3/4', element='td') | ||||||
|  |         ' colspan="3" rowspan="4"' | ||||||
|  | 
 | ||||||
|  |         >>> t.vAlign('^') | ||||||
|  |         'top' | ||||||
|  | 
 | ||||||
|  |         >>> t.pba('^', element='td') | ||||||
|  |         ' style="vertical-align:top;"' | ||||||
|  | 
 | ||||||
|  |         >>> t.pba('{line-height:18px}') | ||||||
|  |         ' style="line-height:18px;"' | ||||||
|  | 
 | ||||||
|  |         >>> t.pba('(foo-bar)') | ||||||
|  |         ' class="foo-bar"' | ||||||
|  | 
 | ||||||
|  |         >>> t.pba('(#myid)') | ||||||
|  |         ' id="myid"' | ||||||
|  | 
 | ||||||
|  |         >>> t.pba('(foo-bar#myid)') | ||||||
|  |         ' class="foo-bar" id="myid"' | ||||||
|  | 
 | ||||||
|  |         >>> t.pba('((((') | ||||||
|  |         ' style="padding-left:4em;"' | ||||||
|  | 
 | ||||||
|  |         >>> t.pba(')))') | ||||||
|  |         ' style="padding-right:3em;"' | ||||||
|  | 
 | ||||||
|  |         >>> t.pba('[fr]') | ||||||
|  |         ' lang="fr"' | ||||||
|  | 
 | ||||||
|  |         """ | ||||||
|  |         style = [] | ||||||
|  |         aclass = '' | ||||||
|  |         lang = '' | ||||||
|  |         colspan = '' | ||||||
|  |         rowspan = '' | ||||||
|  |         id = '' | ||||||
|  | 
 | ||||||
|  |         if not input: | ||||||
|  |             return '' | ||||||
|  | 
 | ||||||
|  |         matched = input | ||||||
|  |         if element == 'td': | ||||||
|  |             m = re.search(r'\\(\d+)', matched) | ||||||
|  |             if m: | ||||||
|  |                 colspan = m.group(1) | ||||||
|  | 
 | ||||||
|  |             m = re.search(r'/(\d+)', matched) | ||||||
|  |             if m: | ||||||
|  |                 rowspan = m.group(1) | ||||||
|  | 
 | ||||||
|  |         if element == 'td' or element == 'tr': | ||||||
|  |             m = re.search(r'(%s)' % self.vlgn, matched) | ||||||
|  |             if m: | ||||||
|  |                 style.append("vertical-align:%s;" % self.vAlign(m.group(1))) | ||||||
|  | 
 | ||||||
|  |         m = re.search(r'\{([^}]*)\}', matched) | ||||||
|  |         if m: | ||||||
|  |             style.append(m.group(1).rstrip(';') + ';') | ||||||
|  |             matched = matched.replace(m.group(0), '') | ||||||
|  | 
 | ||||||
|  |         m = re.search(r'\[([^\]]+)\]', matched, re.U) | ||||||
|  |         if m: | ||||||
|  |             lang = m.group(1) | ||||||
|  |             matched = matched.replace(m.group(0), '') | ||||||
|  | 
 | ||||||
|  |         m = re.search(r'\(([^()]+)\)', matched, re.U) | ||||||
|  |         if m: | ||||||
|  |             aclass = m.group(1) | ||||||
|  |             matched = matched.replace(m.group(0), '') | ||||||
|  | 
 | ||||||
|  |         m = re.search(r'([(]+)', matched) | ||||||
|  |         if m: | ||||||
|  |             style.append("padding-left:%sem;" % len(m.group(1))) | ||||||
|  |             matched = matched.replace(m.group(0), '') | ||||||
|  | 
 | ||||||
|  |         m = re.search(r'([)]+)', matched) | ||||||
|  |         if m: | ||||||
|  |             style.append("padding-right:%sem;" % len(m.group(1))) | ||||||
|  |             matched = matched.replace(m.group(0), '') | ||||||
|  | 
 | ||||||
|  |         m = re.search(r'(%s)' % self.hlgn, matched) | ||||||
|  |         if m: | ||||||
|  |             style.append("text-align:%s;" % self.hAlign(m.group(1))) | ||||||
|  | 
 | ||||||
|  |         m = re.search(r'^(.*)#(.*)$', aclass) | ||||||
|  |         if m: | ||||||
|  |             id = m.group(2) | ||||||
|  |             aclass = m.group(1) | ||||||
|  | 
 | ||||||
|  |         if self.restricted: | ||||||
|  |             if lang: | ||||||
|  |                 return ' lang="%s"' | ||||||
|  |             else: | ||||||
|  |                 return '' | ||||||
|  | 
 | ||||||
|  |         result = [] | ||||||
|  |         if style: | ||||||
|  |             result.append(' style="%s"' % "".join(style)) | ||||||
|  |         if aclass: | ||||||
|  |             result.append(' class="%s"' % aclass) | ||||||
|  |         if lang: | ||||||
|  |             result.append(' lang="%s"' % lang) | ||||||
|  |         if id: | ||||||
|  |             result.append(' id="%s"' % id) | ||||||
|  |         if colspan: | ||||||
|  |             result.append(' colspan="%s"' % colspan) | ||||||
|  |         if rowspan: | ||||||
|  |             result.append(' rowspan="%s"' % rowspan) | ||||||
|  |         return ''.join(result) | ||||||
|  | 
 | ||||||
|  |     def hasRawText(self, text): | ||||||
|  |         """ | ||||||
|  |         checks whether the text has text not already enclosed by a block tag | ||||||
|  | 
 | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.hasRawText('<p>foo bar biz baz</p>') | ||||||
|  |         False | ||||||
|  | 
 | ||||||
|  |         >>> t.hasRawText(' why yes, yes it does') | ||||||
|  |         True | ||||||
|  | 
 | ||||||
|  |         """ | ||||||
|  |         r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|pre|h\d)[^>]*?>.*</\1>', re.S).sub('', text.strip()).strip() | ||||||
|  |         r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r) | ||||||
|  |         return '' != r | ||||||
|  | 
 | ||||||
|  |     def table(self, text): | ||||||
|  |         r""" | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.table('|one|two|three|\n|a|b|c|') | ||||||
|  |         '\t<table>\n\t\t<tr>\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t</table>\n\n' | ||||||
|  |         """ | ||||||
|  |         text = text + "\n\n" | ||||||
|  |         pattern = re.compile(r'^(?:table(_?%(s)s%(a)s%(c)s)\. ?\n)?^(%(a)s%(c)s\.? ?\|.*\|)\n\n' % {'s':self.s, 'a':self.a, 'c':self.c}, re.S|re.M|re.U) | ||||||
|  |         return pattern.sub(self.fTable, text) | ||||||
|  | 
 | ||||||
|  |     def fTable(self, match): | ||||||
|  |         tatts = self.pba(match.group(1), 'table') | ||||||
|  |         rows = [] | ||||||
|  |         for row in [ x for x in match.group(2).split('\n') if x]: | ||||||
|  |             rmtch = re.search(r'^(%s%s\. )(.*)' % (self.a, self.c), row.lstrip()) | ||||||
|  |             if rmtch: | ||||||
|  |                 ratts = self.pba(rmtch.group(1), 'tr') | ||||||
|  |                 row = rmtch.group(2) | ||||||
|  |             else: | ||||||
|  |                 ratts = '' | ||||||
|  | 
 | ||||||
|  |             cells = [] | ||||||
|  |             for cell in row.split('|')[1:-1]: | ||||||
|  |                 ctyp = 'd' | ||||||
|  |                 if re.search(r'^_', cell): | ||||||
|  |                     ctyp = "h" | ||||||
|  |                 cmtch = re.search(r'^(_?%s%s%s\. )(.*)' % (self.s, self.a, self.c), cell) | ||||||
|  |                 if cmtch: | ||||||
|  |                     catts = self.pba(cmtch.group(1), 'td') | ||||||
|  |                     cell = cmtch.group(2) | ||||||
|  |                 else: | ||||||
|  |                     catts = '' | ||||||
|  | 
 | ||||||
|  |                 cell = self.graf(self.span(cell)) | ||||||
|  |                 cells.append('\t\t\t<t%s%s>%s</t%s>' % (ctyp, catts, cell, ctyp)) | ||||||
|  |             rows.append("\t\t<tr%s>\n%s\n\t\t</tr>" % (ratts, '\n'.join(cells))) | ||||||
|  |             cells = [] | ||||||
|  |             catts = None | ||||||
|  |         return "\t<table%s>\n%s\n\t</table>\n\n" % (tatts, '\n'.join(rows)) | ||||||
|  | 
 | ||||||
|  |     def lists(self, text): | ||||||
|  |         """ | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.lists("* one\\n* two\\n* three") | ||||||
|  |         '\\t<ul>\\n\\t\\t<li>one</li>\\n\\t\\t<li>two</li>\\n\\t\\t<li>three</li>\\n\\t</ul>' | ||||||
|  |         """ | ||||||
|  |         pattern = re.compile(r'^([#*]+%s .*)$(?![^#*])' % self.c, re.U|re.M|re.S) | ||||||
|  |         return pattern.sub(self.fList, text) | ||||||
|  | 
 | ||||||
|  |     def fList(self, match): | ||||||
|  |         text = match.group(0).split("\n") | ||||||
|  |         result = [] | ||||||
|  |         lists = [] | ||||||
|  |         for i, line in enumerate(text): | ||||||
|  |             try: | ||||||
|  |                 nextline = text[i+1] | ||||||
|  |             except IndexError: | ||||||
|  |                 nextline = '' | ||||||
|  | 
 | ||||||
|  |             m = re.search(r"^([#*]+)(%s%s) (.*)$" % (self.a, self.c), line, re.S) | ||||||
|  |             if m: | ||||||
|  |                 tl, atts, content = m.groups() | ||||||
|  |                 nl = '' | ||||||
|  |                 nm = re.search(r'^([#*]+)\s.*', nextline) | ||||||
|  |                 if nm: | ||||||
|  |                     nl = nm.group(1) | ||||||
|  |                 if tl not in lists: | ||||||
|  |                     lists.append(tl) | ||||||
|  |                     atts = self.pba(atts) | ||||||
|  |                     line = "\t<%sl%s>\n\t\t<li>%s" % (self.lT(tl), atts, self.graf(content)) | ||||||
|  |                 else: | ||||||
|  |                     line = "\t\t<li>" + self.graf(content) | ||||||
|  | 
 | ||||||
|  |                 if len(nl) <= len(tl): | ||||||
|  |                     line = line + "</li>" | ||||||
|  |                 for k in reversed(lists): | ||||||
|  |                     if len(k) > len(nl): | ||||||
|  |                         line = line + "\n\t</%sl>" % self.lT(k) | ||||||
|  |                         if len(k) > 1: | ||||||
|  |                             line = line + "</li>" | ||||||
|  |                         lists.remove(k) | ||||||
|  | 
 | ||||||
|  |             result.append(line) | ||||||
|  |         return "\n".join(result) | ||||||
|  | 
 | ||||||
|  |     def lT(self, input): | ||||||
|  |         if re.search(r'^#+', input): | ||||||
|  |             return 'o' | ||||||
|  |         else: | ||||||
|  |             return 'u' | ||||||
|  | 
 | ||||||
|  |     def doPBr(self, in_): | ||||||
|  |         return re.compile(r'<(p)([^>]*?)>(.*)(</\1>)', re.S).sub(self.doBr, in_) | ||||||
|  | 
 | ||||||
|  |     def doBr(self, match): | ||||||
|  |         if self.html_type == 'html': | ||||||
|  |             content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br>', match.group(3)) | ||||||
|  |         else: | ||||||
|  |             content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br />', match.group(3)) | ||||||
|  |         return '<%s%s>%s%s' % (match.group(1), match.group(2), content, match.group(4)) | ||||||
|  | 
 | ||||||
|  |     def block(self, text, head_offset = 0): | ||||||
|  |         """ | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.block('h1. foobar baby') | ||||||
|  |         '\\t<h1>foobar baby</h1>' | ||||||
|  |         """ | ||||||
|  |         if not self.lite: | ||||||
|  |             tre = '|'.join(self.btag) | ||||||
|  |         else: | ||||||
|  |             tre = '|'.join(self.btag_lite) | ||||||
|  |         text = text.split('\n\n') | ||||||
|  | 
 | ||||||
|  |         tag = 'p' | ||||||
|  |         atts = cite = graf = ext = c1 = '' | ||||||
|  | 
 | ||||||
|  |         out = [] | ||||||
|  | 
 | ||||||
|  |         anon = False | ||||||
|  |         for line in text: | ||||||
|  |             pattern = r'^(%s)(%s%s)\.(\.?)(?::(\S+))? (.*)$' % (tre, self.a, self.c) | ||||||
|  |             match = re.search(pattern, line, re.S) | ||||||
|  |             if match: | ||||||
|  |                 if ext: | ||||||
|  |                     out.append(out.pop() + c1) | ||||||
|  | 
 | ||||||
|  |                 tag, atts, ext, cite, graf = match.groups() | ||||||
|  |                 h_match = re.search(r'h([1-6])', tag) | ||||||
|  |                 if h_match: | ||||||
|  |                     head_level, = h_match.groups() | ||||||
|  |                     tag = 'h%i' % max(1, | ||||||
|  |                                       min(int(head_level) + head_offset, | ||||||
|  |                                           6)) | ||||||
|  |                 o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext, | ||||||
|  |                                                       cite, graf) | ||||||
|  |                 # leave off c1 if this block is extended, | ||||||
|  |                 # we'll close it at the start of the next block | ||||||
|  | 
 | ||||||
|  |                 if ext: | ||||||
|  |                     line = "%s%s%s%s" % (o1, o2, content, c2) | ||||||
|  |                 else: | ||||||
|  |                     line = "%s%s%s%s%s" % (o1, o2, content, c2, c1) | ||||||
|  | 
 | ||||||
|  |             else: | ||||||
|  |                 anon = True | ||||||
|  |                 if ext or not re.search(r'^\s', line): | ||||||
|  |                     o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext, | ||||||
|  |                                                           cite, line) | ||||||
|  |                     # skip $o1/$c1 because this is part of a continuing | ||||||
|  |                     # extended block | ||||||
|  |                     if tag == 'p' and not self.hasRawText(content): | ||||||
|  |                         line = content | ||||||
|  |                     else: | ||||||
|  |                         line = "%s%s%s" % (o2, content, c2) | ||||||
|  |                 else: | ||||||
|  |                     line = self.graf(line) | ||||||
|  | 
 | ||||||
|  |             line = self.doPBr(line) | ||||||
|  |             if self.html_type == 'xhtml': | ||||||
|  |                 line = re.sub(r'<br>', '<br />', line) | ||||||
|  | 
 | ||||||
|  |             if ext and anon: | ||||||
|  |                 out.append(out.pop() + "\n" + line) | ||||||
|  |             else: | ||||||
|  |                 out.append(line) | ||||||
|  | 
 | ||||||
|  |             if not ext: | ||||||
|  |                 tag = 'p' | ||||||
|  |                 atts = '' | ||||||
|  |                 cite = '' | ||||||
|  |                 graf = '' | ||||||
|  | 
 | ||||||
|  |         if ext: | ||||||
|  |             out.append(out.pop() + c1) | ||||||
|  |         return '\n\n'.join(out) | ||||||
|  | 
 | ||||||
|  |     def fBlock(self, tag, atts, ext, cite, content): | ||||||
|  |         """ | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.fBlock("bq", "", None, "", "Hello BlockQuote") | ||||||
|  |         ('\\t<blockquote>\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>') | ||||||
|  | 
 | ||||||
|  |         >>> t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote") | ||||||
|  |         ('\\t<blockquote cite="http://google.com">\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>') | ||||||
|  | 
 | ||||||
|  |         >>> t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS | ||||||
|  |         ('<pre>', '<code>', ..., '</code>', '</pre>') | ||||||
|  | 
 | ||||||
|  |         >>> t.fBlock("h1", "", None, "", "foobar") | ||||||
|  |         ('', '\\t<h1>', 'foobar', '</h1>', '') | ||||||
|  |         """ | ||||||
|  |         atts = self.pba(atts) | ||||||
|  |         o1 = o2 = c2 = c1 = '' | ||||||
|  | 
 | ||||||
|  |         m = re.search(r'fn(\d+)', tag) | ||||||
|  |         if m: | ||||||
|  |             tag = 'p' | ||||||
|  |             if m.group(1) in self.fn: | ||||||
|  |                 fnid = self.fn[m.group(1)] | ||||||
|  |             else: | ||||||
|  |                 fnid = m.group(1) | ||||||
|  |             atts = atts + ' id="fn%s"' % fnid | ||||||
|  |             if atts.find('class=') < 0: | ||||||
|  |                 atts = atts + ' class="footnote"' | ||||||
|  |             content = ('<sup>%s</sup>' % m.group(1)) + content | ||||||
|  | 
 | ||||||
|  |         if tag == 'bq': | ||||||
|  |             cite = self.checkRefs(cite) | ||||||
|  |             if cite: | ||||||
|  |                 cite = ' cite="%s"' % cite | ||||||
|  |             else: | ||||||
|  |                 cite = '' | ||||||
|  |             o1 = "\t<blockquote%s%s>\n" % (cite, atts) | ||||||
|  |             o2 = "\t\t<p%s>" % atts | ||||||
|  |             c2 = "</p>" | ||||||
|  |             c1 = "\n\t</blockquote>" | ||||||
|  | 
 | ||||||
|  |         elif tag == 'bc': | ||||||
|  |             o1 = "<pre%s>" % atts | ||||||
|  |             o2 = "<code%s>" % atts | ||||||
|  |             c2 = "</code>" | ||||||
|  |             c1 = "</pre>" | ||||||
|  |             content = self.shelve(self.encode_html(content.rstrip("\n") + "\n")) | ||||||
|  | 
 | ||||||
|  |         elif tag == 'notextile': | ||||||
|  |             content = self.shelve(content) | ||||||
|  |             o1 = o2 = '' | ||||||
|  |             c1 = c2 = '' | ||||||
|  | 
 | ||||||
|  |         elif tag == 'pre': | ||||||
|  |             content = self.shelve(self.encode_html(content.rstrip("\n") + "\n")) | ||||||
|  |             o1 = "<pre%s>" % atts | ||||||
|  |             o2 = c2 = '' | ||||||
|  |             c1 = '</pre>' | ||||||
|  | 
 | ||||||
|  |         else: | ||||||
|  |             o2 = "\t<%s%s>" % (tag, atts) | ||||||
|  |             c2 = "</%s>" % tag | ||||||
|  | 
 | ||||||
|  |         content = self.graf(content) | ||||||
|  |         return o1, o2, content, c2, c1 | ||||||
|  | 
 | ||||||
|  |     def footnoteRef(self, text): | ||||||
|  |         """ | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS | ||||||
|  |         'foo<sup class="footnote"><a href="#fn...">1</a></sup> ' | ||||||
|  |         """ | ||||||
|  |         return re.sub(r'\b\[([0-9]+)\](\s)?', self.footnoteID, text) | ||||||
|  | 
 | ||||||
|  |     def footnoteID(self, match): | ||||||
|  |         id, t = match.groups() | ||||||
|  |         if id not in self.fn: | ||||||
|  |             self.fn[id] = str(uuid.uuid4()) | ||||||
|  |         fnid = self.fn[id] | ||||||
|  |         if not t: | ||||||
|  |             t = '' | ||||||
|  |         return '<sup class="footnote"><a href="#fn%s">%s</a></sup>%s' % (fnid, id, t) | ||||||
|  | 
 | ||||||
|  |     def glyphs(self, text): | ||||||
|  |         """ | ||||||
|  |         >>> t = Textile() | ||||||
|  | 
 | ||||||
|  |         >>> t.glyphs("apostrophe's") | ||||||
|  |         'apostrophe’s' | ||||||
|  | 
 | ||||||
|  |         >>> t.glyphs("back in '88") | ||||||
|  |         'back in ’88' | ||||||
|  | 
 | ||||||
|  |         >>> t.glyphs('foo ...') | ||||||
|  |         'foo …' | ||||||
|  | 
 | ||||||
|  |         >>> t.glyphs('--') | ||||||
|  |         '—' | ||||||
|  | 
 | ||||||
|  |         >>> t.glyphs('FooBar[tm]') | ||||||
|  |         'FooBar™' | ||||||
|  | 
 | ||||||
|  |         >>> t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>") | ||||||
|  |         '<p><cite>Cat’s Cradle</cite> by Vonnegut</p>' | ||||||
|  | 
 | ||||||
|  |         """ | ||||||
|  |          # fix: hackish | ||||||
|  |         text = re.sub(r'"\Z', '\" ', text) | ||||||
|  | 
 | ||||||
|  |         glyph_search = ( | ||||||
|  |             re.compile(r"(\w)\'(\w)"),                                      # apostrophe's | ||||||
|  |             re.compile(r'(\s)\'(\d+\w?)\b(?!\')'),                          # back in '88 | ||||||
|  |             re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'),                       #  single closing | ||||||
|  |             re.compile(r'\'/'),                                             #  single opening | ||||||
|  |             re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'),                       #  double closing | ||||||
|  |             re.compile(r'"'),                                               #  double opening | ||||||
|  |             re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),        #  3+ uppercase acronym | ||||||
|  |             re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),           #  3+ uppercase | ||||||
|  |             re.compile(r'\b(\s{0,1})?\.{3}'),                                     #  ellipsis | ||||||
|  |             re.compile(r'(\s?)--(\s?)'),                                    #  em dash | ||||||
|  |             re.compile(r'\s-(?:\s|$)'),                                     #  en dash | ||||||
|  |             re.compile(r'(\d+)( ?)x( ?)(?=\d+)'),                           #  dimension sign | ||||||
|  |             re.compile(r'\b ?[([]TM[])]', re.I),                            #  trademark | ||||||
|  |             re.compile(r'\b ?[([]R[])]', re.I),                             #  registered | ||||||
|  |             re.compile(r'\b ?[([]C[])]', re.I),                             #  copyright | ||||||
|  |          ) | ||||||
|  | 
 | ||||||
|  |         glyph_replace = [x % dict(self.glyph_defaults) for x in ( | ||||||
|  |             r'\1%(txt_apostrophe)s\2',           # apostrophe's | ||||||
|  |             r'\1%(txt_apostrophe)s\2',           # back in '88 | ||||||
|  |             r'\1%(txt_quote_single_close)s',     #  single closing | ||||||
|  |             r'%(txt_quote_single_open)s',         #  single opening | ||||||
|  |             r'\1%(txt_quote_double_close)s',        #  double closing | ||||||
|  |             r'%(txt_quote_double_open)s',             #  double opening | ||||||
|  |             r'<acronym title="\2">\1</acronym>', #  3+ uppercase acronym | ||||||
|  |             r'<span class="caps">\1</span>',     #  3+ uppercase | ||||||
|  |             r'\1%(txt_ellipsis)s',                  #  ellipsis | ||||||
|  |             r'\1%(txt_emdash)s\2',               #  em dash | ||||||
|  |             r' %(txt_endash)s ',                 #  en dash | ||||||
|  |             r'\1\2%(txt_dimension)s\3',          #  dimension sign | ||||||
|  |             r'%(txt_trademark)s',                #  trademark | ||||||
|  |             r'%(txt_registered)s',                #  registered | ||||||
|  |             r'%(txt_copyright)s',                #  copyright | ||||||
|  |         )] | ||||||
|  | 
 | ||||||
|  |         result = [] | ||||||
|  |         for line in re.compile(r'(<.*?>)', re.U).split(text): | ||||||
|  |             if not re.search(r'<.*>', line): | ||||||
|  |                 for s, r in zip(glyph_search, glyph_replace): | ||||||
|  |                     line = s.sub(r, line) | ||||||
|  |             result.append(line) | ||||||
|  |         return ''.join(result) | ||||||
|  | 
 | ||||||
|  |     def vAlign(self, input): | ||||||
|  |         d = {'^':'top', '-':'middle', '~':'bottom'} | ||||||
|  |         return d.get(input, '') | ||||||
|  | 
 | ||||||
|  |     def hAlign(self, input): | ||||||
|  |         d = {'<':'left', '=':'center', '>':'right', '<>': 'justify'} | ||||||
|  |         return d.get(input, '') | ||||||
|  | 
 | ||||||
|  |     def getRefs(self, text): | ||||||
|  |         """ | ||||||
|  |         what is this for? | ||||||
|  |         """ | ||||||
|  |         pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U) | ||||||
|  |         text = pattern.sub(self.refs, text) | ||||||
|  |         return text | ||||||
|  | 
 | ||||||
|  |     def refs(self, match): | ||||||
|  |         flag, url = match.groups() | ||||||
|  |         self.urlrefs[flag] = url | ||||||
|  |         return '' | ||||||
|  | 
 | ||||||
|  |     def checkRefs(self, url): | ||||||
|  |         return self.urlrefs.get(url, url) | ||||||
|  | 
 | ||||||
|  |     def isRelURL(self, url): | ||||||
|  |         """ | ||||||
|  |         Identify relative urls. | ||||||
|  | 
 | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.isRelURL("http://www.google.com/") | ||||||
|  |         False | ||||||
|  |         >>> t.isRelURL("/foo") | ||||||
|  |         True | ||||||
|  | 
 | ||||||
|  |         """ | ||||||
|  |         (scheme, netloc) = urlparse(url)[0:2] | ||||||
|  |         return not scheme and not netloc | ||||||
|  | 
 | ||||||
|  |     def relURL(self, url): | ||||||
|  |         scheme = urlparse(url)[0] | ||||||
|  |         if self.restricted and scheme and scheme not in self.url_schemes: | ||||||
|  |             return '#' | ||||||
|  |         return url | ||||||
|  | 
 | ||||||
|  |     def shelve(self, text): | ||||||
|  |         id = str(uuid.uuid4()) | ||||||
|  |         self.shelf[id] = text | ||||||
|  |         return id | ||||||
|  | 
 | ||||||
|  |     def retrieve(self, text): | ||||||
|  |         """ | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> id = t.shelve("foobar") | ||||||
|  |         >>> t.retrieve(id) | ||||||
|  |         'foobar' | ||||||
|  |         """ | ||||||
|  |         while True: | ||||||
|  |             old = text | ||||||
|  |             for k, v in self.shelf.items(): | ||||||
|  |                 text = text.replace(k, v) | ||||||
|  |             if text == old: | ||||||
|  |                 break | ||||||
|  |         return text | ||||||
|  | 
 | ||||||
|  |     def encode_html(self, text, quotes=True): | ||||||
|  |         a = ( | ||||||
|  |             ('&', '&'), | ||||||
|  |             ('<', '<'), | ||||||
|  |             ('>', '>') | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         if quotes: | ||||||
|  |             a = a + ( | ||||||
|  |                 ("'", '''), | ||||||
|  |                 ('"', '"') | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |         for k, v in a: | ||||||
|  |             text = text.replace(k, v) | ||||||
|  |         return text | ||||||
|  | 
 | ||||||
|  |     def graf(self, text): | ||||||
|  |         if not self.lite: | ||||||
|  |             text = self.noTextile(text) | ||||||
|  |             text = self.code(text) | ||||||
|  | 
 | ||||||
|  |         text = self.links(text) | ||||||
|  | 
 | ||||||
|  |         if not self.noimage: | ||||||
|  |             text = self.image(text) | ||||||
|  | 
 | ||||||
|  |         if not self.lite: | ||||||
|  |             text = self.lists(text) | ||||||
|  |             text = self.table(text) | ||||||
|  | 
 | ||||||
|  |         text = self.span(text) | ||||||
|  |         text = self.footnoteRef(text) | ||||||
|  |         text = self.glyphs(text) | ||||||
|  | 
 | ||||||
|  |         return text.rstrip('\n') | ||||||
|  | 
 | ||||||
|  |     def links(self, text): | ||||||
|  |         """ | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS | ||||||
|  |         'fooobar ... and hello world ...' | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         punct = '!"#$%&\'*+,-./:;=?@\\^_`|~' | ||||||
|  | 
 | ||||||
|  |         pattern = r''' | ||||||
|  |             (?P<pre>    [\s\[{(]|[%s]   )? | ||||||
|  |             "                          # start | ||||||
|  |             (?P<atts>   %s       ) | ||||||
|  |             (?P<text>   [^"]+?   ) | ||||||
|  |             \s? | ||||||
|  |             (?:   \(([^)]+?)\)(?=")   )?     # $title | ||||||
|  |             ": | ||||||
|  |             (?P<url>    (?:ftp|https?)? (?: :// )? [-A-Za-z0-9+&@#/?=~_()|!:,.;]*[-A-Za-z0-9+&@#/=~_()|]   ) | ||||||
|  |             (?P<post>   [^\w\/;]*?   ) | ||||||
|  |             (?=<|\s|$) | ||||||
|  |         ''' % (re.escape(punct), self.c) | ||||||
|  | 
 | ||||||
|  |         text = re.compile(pattern, re.X).sub(self.fLink, text) | ||||||
|  | 
 | ||||||
|  |         return text | ||||||
|  | 
 | ||||||
|  |     def fLink(self, match): | ||||||
|  |         pre, atts, text, title, url, post = match.groups() | ||||||
|  | 
 | ||||||
|  |         if pre == None: | ||||||
|  |             pre = '' | ||||||
|  | 
 | ||||||
|  |         # assume ) at the end of the url is not actually part of the url | ||||||
|  |         # unless the url also contains a ( | ||||||
|  |         if url.endswith(')') and not url.find('(') > -1: | ||||||
|  |             post = url[-1] + post | ||||||
|  |             url = url[:-1] | ||||||
|  | 
 | ||||||
|  |         url = self.checkRefs(url) | ||||||
|  | 
 | ||||||
|  |         atts = self.pba(atts) | ||||||
|  |         if title: | ||||||
|  |             atts = atts +  ' title="%s"' % self.encode_html(title) | ||||||
|  | 
 | ||||||
|  |         if not self.noimage: | ||||||
|  |             text = self.image(text) | ||||||
|  | 
 | ||||||
|  |         text = self.span(text) | ||||||
|  |         text = self.glyphs(text) | ||||||
|  | 
 | ||||||
|  |         url = self.relURL(url) | ||||||
|  |         out = '<a href="%s"%s%s>%s</a>' % (self.encode_html(url), atts, self.rel, text) | ||||||
|  |         out = self.shelve(out) | ||||||
|  |         return ''.join([pre, out, post]) | ||||||
|  | 
 | ||||||
|  |     def span(self, text): | ||||||
|  |         """ | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye") | ||||||
|  |         'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye' | ||||||
|  |         """ | ||||||
|  |         qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^') | ||||||
|  |         pnct = ".,\"'?!;:" | ||||||
|  | 
 | ||||||
|  |         for qtag in qtags: | ||||||
|  |             pattern = re.compile(r""" | ||||||
|  |                 (?:^|(?<=[\s>%(pnct)s])|([\]}])) | ||||||
|  |                 (%(qtag)s)(?!%(qtag)s) | ||||||
|  |                 (%(c)s) | ||||||
|  |                 (?::(\S+))? | ||||||
|  |                 ([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n]) | ||||||
|  |                 ([%(pnct)s]*) | ||||||
|  |                 %(qtag)s | ||||||
|  |                 (?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s)) | ||||||
|  |             """ % {'qtag':qtag, 'c':self.c, 'pnct':pnct, | ||||||
|  |                    'selfpnct':self.pnct}, re.X) | ||||||
|  |             text = pattern.sub(self.fSpan, text) | ||||||
|  |         return text | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     def fSpan(self, match): | ||||||
|  |         _, tag, atts, cite, content, end, _ = match.groups() | ||||||
|  | 
 | ||||||
|  |         qtags = { | ||||||
|  |             '*': 'strong', | ||||||
|  |             '**': 'b', | ||||||
|  |             '??': 'cite', | ||||||
|  |             '_' : 'em', | ||||||
|  |             '__': 'i', | ||||||
|  |             '-' : 'del', | ||||||
|  |             '%' : 'span', | ||||||
|  |             '+' : 'ins', | ||||||
|  |             '~' : 'sub', | ||||||
|  |             '^' : 'sup' | ||||||
|  |         } | ||||||
|  |         tag = qtags[tag] | ||||||
|  |         atts = self.pba(atts) | ||||||
|  |         if cite: | ||||||
|  |             atts = atts + 'cite="%s"' % cite | ||||||
|  | 
 | ||||||
|  |         content = self.span(content) | ||||||
|  | 
 | ||||||
|  |         out = "<%s%s>%s%s</%s>" % (tag, atts, content, end, tag) | ||||||
|  |         return out | ||||||
|  | 
 | ||||||
|  |     def image(self, text): | ||||||
|  |         """ | ||||||
|  |         >>> t = Textile() | ||||||
|  |         >>> t.image('!/imgs/myphoto.jpg!:http://jsamsa.com') | ||||||
|  |         '<a href="http://jsamsa.com"><img src="/imgs/myphoto.jpg" alt="" /></a>' | ||||||
|  |         """ | ||||||
|  |         pattern = re.compile(r""" | ||||||
|  |             (?:[\[{])?          # pre | ||||||
|  |             \!                 # opening ! | ||||||
|  |             (%s)               # optional style,class atts | ||||||
|  |             (?:\. )?           # optional dot-space | ||||||
|  |             ([^\s(!]+)         # presume this is the src | ||||||
|  |             \s?                # optional space | ||||||
|  |             (?:\(([^\)]+)\))?  # optional title | ||||||
|  |             \!                 # closing | ||||||
|  |             (?::(\S+))?        # optional href | ||||||
|  |             (?:[\]}]|(?=\s|$)) # lookahead: space or end of string | ||||||
|  |         """ % self.c, re.U|re.X) | ||||||
|  |         return pattern.sub(self.fImage, text) | ||||||
|  | 
 | ||||||
|  |     def fImage(self, match): | ||||||
|  |         # (None, '', '/imgs/myphoto.jpg', None, None) | ||||||
|  |         atts, url, title, href = match.groups() | ||||||
|  |         atts  = self.pba(atts) | ||||||
|  | 
 | ||||||
|  |         if title: | ||||||
|  |             atts = atts + ' title="%s" alt="%s"' % (title, title) | ||||||
|  |         else: | ||||||
|  |             atts = atts + ' alt=""' | ||||||
|  | 
 | ||||||
|  |         if not self.isRelURL(url) and self.get_sizes: | ||||||
|  |             size = getimagesize(url) | ||||||
|  |             if (size): | ||||||
|  |                 atts += " %s" % size | ||||||
|  | 
 | ||||||
|  |         if href: | ||||||
|  |             href = self.checkRefs(href) | ||||||
|  | 
 | ||||||
|  |         url = self.checkRefs(url) | ||||||
|  |         url = self.relURL(url) | ||||||
|  | 
 | ||||||
|  |         out = [] | ||||||
|  |         if href: | ||||||
|  |             out.append('<a href="%s" class="img">' % href) | ||||||
|  |         if self.html_type == 'html': | ||||||
|  |             out.append('<img src="%s"%s>' % (url, atts)) | ||||||
|  |         else: | ||||||
|  |             out.append('<img src="%s"%s />' % (url, atts)) | ||||||
|  |         if href: | ||||||
|  |             out.append('</a>') | ||||||
|  | 
 | ||||||
|  |         return ''.join(out) | ||||||
|  | 
 | ||||||
|  |     def code(self, text): | ||||||
|  |         text = self.doSpecial(text, '<code>', '</code>', self.fCode) | ||||||
|  |         text = self.doSpecial(text, '@', '@', self.fCode) | ||||||
|  |         text = self.doSpecial(text, '<pre>', '</pre>', self.fPre) | ||||||
|  |         return text | ||||||
|  | 
 | ||||||
|  |     def fCode(self, match): | ||||||
|  |         before, text, after = match.groups() | ||||||
|  |         if after == None: | ||||||
|  |             after = '' | ||||||
|  |         # text needs to be escaped | ||||||
|  |         if not self.restricted: | ||||||
|  |             text = self.encode_html(text) | ||||||
|  |         return ''.join([before, self.shelve('<code>%s</code>' % text), after]) | ||||||
|  | 
 | ||||||
|  |     def fPre(self, match): | ||||||
|  |         before, text, after = match.groups() | ||||||
|  |         if after == None: | ||||||
|  |             after = '' | ||||||
|  |         # text needs to be escapedd | ||||||
|  |         if not self.restricted: | ||||||
|  |             text = self.encode_html(text) | ||||||
|  |         return ''.join([before, '<pre>', self.shelve(text), '</pre>', after]) | ||||||
|  | 
 | ||||||
|  |     def doSpecial(self, text, start, end, method=None): | ||||||
|  |         if method == None: | ||||||
|  |             method = self.fSpecial | ||||||
|  |         pattern = re.compile(r'(^|\s|[\[({>])%s(.*?)%s(\s|$|[\])}])?' % (re.escape(start), re.escape(end)), re.M|re.S) | ||||||
|  |         return pattern.sub(method, text) | ||||||
|  | 
 | ||||||
|  |     def fSpecial(self, match): | ||||||
|  |         """ | ||||||
|  |         special blocks like notextile or code | ||||||
|  |         """ | ||||||
|  |         before, text, after = match.groups() | ||||||
|  |         if after == None: | ||||||
|  |             after = '' | ||||||
|  |         return ''.join([before, self.shelve(self.encode_html(text)), after]) | ||||||
|  | 
 | ||||||
|  |     def noTextile(self, text): | ||||||
|  |         text = self.doSpecial(text, '<notextile>', '</notextile>', self.fTextile) | ||||||
|  |         return self.doSpecial(text, '==', '==', self.fTextile) | ||||||
|  | 
 | ||||||
|  |     def fTextile(self, match): | ||||||
|  |         before, notextile, after = match.groups() | ||||||
|  |         if after == None: | ||||||
|  |             after = '' | ||||||
|  |         return ''.join([before, self.shelve(notextile), after]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def textile(text, head_offset=0, html_type='xhtml', encoding=None, output=None): | ||||||
|  |     """ | ||||||
|  |     this function takes additional parameters: | ||||||
|  |     head_offset - offset to apply to heading levels (default: 0) | ||||||
|  |     html_type - 'xhtml' or 'html' style tags (default: 'xhtml') | ||||||
|  |     """ | ||||||
|  |     return Textile().textile(text, head_offset=head_offset, | ||||||
|  |                              html_type=html_type) | ||||||
|  | 
 | ||||||
|  | def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'): | ||||||
|  |     """ | ||||||
|  |     Restricted version of Textile designed for weblog comments and other | ||||||
|  |     untrusted input. | ||||||
|  | 
 | ||||||
|  |     Raw HTML is escaped. | ||||||
|  |     Style attributes are disabled. | ||||||
|  |     rel='nofollow' is added to external links. | ||||||
|  | 
 | ||||||
|  |     When lite=True is set (the default): | ||||||
|  |     Block tags are restricted to p, bq, and bc. | ||||||
|  |     Lists and tables are disabled. | ||||||
|  | 
 | ||||||
|  |     When noimage=True is set (the default): | ||||||
|  |     Image tags are disabled. | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     return Textile(restricted=True, lite=lite, | ||||||
|  |                    noimage=noimage).textile(text, rel='nofollow', | ||||||
|  |                                             html_type=html_type) | ||||||
|  | 
 | ||||||
| @ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect | |||||||
| from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ | from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ | ||||||
|     separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ |     separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ | ||||||
|     preserve_spaces, detect_paragraph_type, detect_formatting_type, \ |     preserve_spaces, detect_paragraph_type, detect_formatting_type, \ | ||||||
|     convert_heuristic, normalize_line_endings |     convert_heuristic, normalize_line_endings, convert_textile | ||||||
| from calibre import _ent_pat, xml_entity_to_unicode | from calibre import _ent_pat, xml_entity_to_unicode | ||||||
| 
 | 
 | ||||||
| class TXTInput(InputFormatPlugin): | class TXTInput(InputFormatPlugin): | ||||||
| @ -41,6 +41,7 @@ class TXTInput(InputFormatPlugin): | |||||||
|                    'paragraph and no styling is applied.\n' |                    'paragraph and no styling is applied.\n' | ||||||
|                    '* heuristic: Process using heuristics to determine formatting such ' |                    '* heuristic: Process using heuristics to determine formatting such ' | ||||||
|                    'as chapter headings and italic text.\n' |                    'as chapter headings and italic text.\n' | ||||||
|  |                    '* textile: Processing using textile formatting.\n' | ||||||
|                    '* markdown: Processing using markdown formatting. ' |                    '* markdown: Processing using markdown formatting. ' | ||||||
|                    'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), |                    'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), | ||||||
|         OptionRecommendation(name='preserve_spaces', recommended_value=False, |         OptionRecommendation(name='preserve_spaces', recommended_value=False, | ||||||
| @ -91,6 +92,9 @@ class TXTInput(InputFormatPlugin): | |||||||
|             except RuntimeError: |             except RuntimeError: | ||||||
|                 raise ValueError('This txt file has malformed markup, it cannot be' |                 raise ValueError('This txt file has malformed markup, it cannot be' | ||||||
|                     ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') |                     ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') | ||||||
|  |         elif options.formatting_type == 'textile': | ||||||
|  |             log.debug('Running text though textile conversion...') | ||||||
|  |             html = convert_textile(txt) | ||||||
|         else: |         else: | ||||||
|             # Determine the paragraph type of the document. |             # Determine the paragraph type of the document. | ||||||
|             if options.paragraph_type == 'auto': |             if options.paragraph_type == 'auto': | ||||||
|  | |||||||
| @ -7,7 +7,6 @@ Read content from txt file. | |||||||
| import os, re | import os, re | ||||||
| 
 | 
 | ||||||
| from calibre import prepare_string_for_xml, isbytestring | from calibre import prepare_string_for_xml, isbytestring | ||||||
| from calibre.ebooks.markdown import markdown |  | ||||||
| from calibre.ebooks.metadata.opf2 import OPFCreator | from calibre.ebooks.metadata.opf2 import OPFCreator | ||||||
| from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor | from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor | ||||||
| from calibre.ebooks.conversion.preprocess import DocAnalysis | from calibre.ebooks.conversion.preprocess import DocAnalysis | ||||||
| @ -74,12 +73,18 @@ def convert_heuristic(txt, title='', epub_split_size_kb=0): | |||||||
|     return tp.convert(txt, title, epub_split_size_kb) |     return tp.convert(txt, title, epub_split_size_kb) | ||||||
| 
 | 
 | ||||||
| def convert_markdown(txt, title='', disable_toc=False): | def convert_markdown(txt, title='', disable_toc=False): | ||||||
|  |     from calibre.ebooks.markdown import markdown | ||||||
|     md = markdown.Markdown( |     md = markdown.Markdown( | ||||||
|           extensions=['footnotes', 'tables', 'toc'], |           extensions=['footnotes', 'tables', 'toc'], | ||||||
|           extension_configs={"toc": {"disable_toc": disable_toc}}, |           extension_configs={"toc": {"disable_toc": disable_toc}}, | ||||||
|           safe_mode=False) |           safe_mode=False) | ||||||
|     return HTML_TEMPLATE % (title, md.convert(txt)) |     return HTML_TEMPLATE % (title, md.convert(txt)) | ||||||
| 
 | 
 | ||||||
|  | def convert_textile(txt, title=''): | ||||||
|  |     from calibre.ebooks.textile import textile | ||||||
|  |     html = textile(txt, encoding='utf-8') | ||||||
|  |     return HTML_TEMPLATE % (title, html) | ||||||
|  | 
 | ||||||
| def normalize_line_endings(txt): | def normalize_line_endings(txt): | ||||||
|     txt = txt.replace('\r\n', '\n') |     txt = txt.replace('\r\n', '\n') | ||||||
|     txt = txt.replace('\r', '\n') |     txt = txt.replace('\r', '\n') | ||||||
| @ -157,24 +162,33 @@ def detect_paragraph_type(txt): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def detect_formatting_type(txt): | def detect_formatting_type(txt): | ||||||
|  |     markdown_count = 0 | ||||||
|  |     textile_count = 0 | ||||||
|  | 
 | ||||||
|     # Check for markdown |     # Check for markdown | ||||||
|     # Headings |     # Headings | ||||||
|     if len(re.findall('(?mu)^#+', txt)) >= 5: |     markdown_count += len(re.findall('(?mu)^#+', txt)) | ||||||
|         return 'markdown' |     markdown_count += len(re.findall('(?mu)^=+$', txt)) | ||||||
|     if len(re.findall('(?mu)^=+$', txt)) >= 5: |     markdown_count += len(re.findall('(?mu)^-+$', txt)) | ||||||
|         return 'markdown' |  | ||||||
|     if len(re.findall('(?mu)^-+$', txt)) >= 5: |  | ||||||
|         return 'markdown' |  | ||||||
|     # Images |     # Images | ||||||
|     if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5: |     markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) | ||||||
|         return 'markdown' |  | ||||||
|     # Links |     # Links | ||||||
|     if len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt)) >= 5: |     markdown_count += len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt)) | ||||||
|         return 'markdown' | 
 | ||||||
|     # Escaped characters |     # Check for textile | ||||||
|     md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!'] |     # Headings | ||||||
|     for c in md_escapted_characters: |     textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt)) | ||||||
|         if txt.count('\\'+c) > 10: |     # Block quote. | ||||||
|  |     textile_count += len(re.findall(r'(?mu)^bq\.', txt)) | ||||||
|  |     # Images | ||||||
|  |     textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt)) | ||||||
|  |     # Links | ||||||
|  |     textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) | ||||||
|  | 
 | ||||||
|  |     if markdown_count > 5 or textile_count > 5: | ||||||
|  |         if markdown_count > textile_count: | ||||||
|             return 'markdown' |             return 'markdown' | ||||||
|  |         else: | ||||||
|  |             return 'textile' | ||||||
| 
 | 
 | ||||||
|     return 'heuristic' |     return 'heuristic' | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user