TXTZ Import plugin.

2025-07-09 03:04:10 -04:00 · 2011-02-14 20:24:21 -05:00 · 2011-02-14 20:24:21 -05:00 · 647c24b706
commit 647c24b706
parent 13f9cb5ea7
1 changed files with 64 additions and 2 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -2,11 +2,13 @@ import os.path
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import textwrap, os, glob, functools
+import textwrap, os, glob, functools, re
+from calibre import guess_type
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
+from calibre.ebooks.oeb.base import OEB_IMAGES

 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@ -82,6 +84,66 @@ class PML2PMLZ(FileTypePlugin):

        return of.name

+class TXT2TXTZ(FileTypePlugin):
+    name = 'TXT to TXTZ'
+    author = 'John Schember'
+    description = _('Create a TXTZ archive when a TXT file is imported '
+        'containing Markdown or Textile references to images. The referenced '
+        'images as well as the TXT file are added to the archive.')
+    version = numeric_version
+    file_types = set(['txt'])
+    supported_platforms = ['windows', 'osx', 'linux']
+    on_import = True
+    
+    def _get_image_references(self, txt, base_dir):
+        images = []
+        
+        # Textile
+        for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt):
+            path = m.group('path')
+            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
+                images.append(path)
+                
+        # Markdown inline        
+        for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)', txt):
+            path = m.group('path')
+            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
+                images.append(path)
+        
+        # Markdown reference
+        refs = {}
+        for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$', txt):
+            if m.group('id') and m.group('path'):
+                refs[m.group('id')] = m.group('path')
+        for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]', txt):
+            path = refs.get(m.group('id'), None)
+            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
+                images.append(path)
+
+        # Remove duplicates
+        return list(set(images))
+    
+    def run(self, path_to_ebook):
+        with open(path_to_ebook, 'rb') as ebf:
+            txt = ebf.read()
+        base_dir = os.path.dirname(path_to_ebook)
+        images = self._get_image_references(txt, base_dir)
+        
+        if images:
+            # Create TXTZ and put file plus images inside of it.
+            import zipfile
+            of = self.temporary_file('_plugin_txt2txtz.txtz')
+            txtz = zipfile.ZipFile(of.name, 'w')
+            txtz.write(path_to_ebook, os.path.basename(path_to_ebook), zipfile.ZIP_DEFLATED)
+            for image in images:
+                txtz.write(os.path.join(base_dir, image), image)
+            txtz.close()
+
+            return of.name
+        else:
+            # No images so just import the TXT file.
+            return path_to_ebook
+
 # }}}

 # Metadata reader plugins {{{
@ -516,7 +578,7 @@ from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck

-plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
+plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
        Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
        NiceBooksCovers]