diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 1dd575f45b..87c83eff52 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -2,11 +2,13 @@ import os.path __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import textwrap, os, glob, functools +import textwrap, os, glob, functools, re +from calibre import guess_type from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \ MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase from calibre.constants import numeric_version from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata +from calibre.ebooks.oeb.base import OEB_IMAGES # To archive plugins {{{ class HTML2ZIP(FileTypePlugin): @@ -82,6 +84,66 @@ class PML2PMLZ(FileTypePlugin): return of.name +class TXT2TXTZ(FileTypePlugin): + name = 'TXT to TXTZ' + author = 'John Schember' + description = _('Create a TXTZ archive when a TXT file is imported ' + 'containing Markdown or Textile references to images. The referenced ' + 'images as well as the TXT file are added to the archive.') + version = numeric_version + file_types = set(['txt']) + supported_platforms = ['windows', 'osx', 'linux'] + on_import = True + + def _get_image_references(self, txt, base_dir): + images = [] + + # Textile + for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt): + path = m.group('path') + if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): + images.append(path) + + # Markdown inline + for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P[^\)]*)\)', txt): + path = m.group('path') + if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): + images.append(path) + + # Markdown reference + refs = {} + for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P[^\]]*)\]:\s*(?P[^\s]*)$', txt): + if m.group('id') and m.group('path'): + refs[m.group('id')] = m.group('path') + for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P[^\]]*)\]', txt): + path = refs.get(m.group('id'), None) + if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): + images.append(path) + + # Remove duplicates + return list(set(images)) + + def run(self, path_to_ebook): + with open(path_to_ebook, 'rb') as ebf: + txt = ebf.read() + base_dir = os.path.dirname(path_to_ebook) + images = self._get_image_references(txt, base_dir) + + if images: + # Create TXTZ and put file plus images inside of it. + import zipfile + of = self.temporary_file('_plugin_txt2txtz.txtz') + txtz = zipfile.ZipFile(of.name, 'w') + txtz.write(path_to_ebook, os.path.basename(path_to_ebook), zipfile.ZIP_DEFLATED) + for image in images: + txtz.write(os.path.join(base_dir, image), image) + txtz.close() + + return of.name + else: + # No images so just import the TXT file. + return path_to_ebook + # }}} # Metadata reader plugins {{{ @@ -516,7 +578,7 @@ from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX from calibre.ebooks.epub.fix.unmanifested import Unmanifested from calibre.ebooks.epub.fix.epubcheck import Epubcheck -plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, +plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, NiceBooksCovers]