diff --git a/resources/recipes/danas.recipe b/resources/recipes/danas.recipe index 4de308a57d..081c46a5d2 100644 --- a/resources/recipes/danas.recipe +++ b/resources/recipes/danas.recipe @@ -1,64 +1,63 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' danas.rs ''' + import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Danas(BasicNewsRecipe): title = 'Danas' __author__ = 'Darko Miletic' - description = 'Vesti' + description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.' publisher = 'Danas d.o.o.' category = 'news, politics, Serbia' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = False use_embedded_content = False + encoding = 'utf-8' + masthead_url = 'http://www.danas.rs/images/basic/danas.gif' language = 'sr' - lang = 'sr-Latn-RS' - direction = 'ltr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language - , 'pretty_print' : True } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] keep_only_tags = [dict(name='div', attrs={'id':'left'})] remove_tags = [ dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']}) ,dict(name='div', attrs={'id':'comments'}) - ,dict(name=['object','link']) + ,dict(name=['object','link','iframe']) ] - feeds = [ - (u'Vesti' , u'http://www.danas.rs/rss/rss.asp' ) - ,(u'Periskop', u'http://www.danas.rs/rss/rss.asp?column_id=4') + feeds = [ + (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27') + ,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' ) + ,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24') + ,(u'Dijalog' , u'http://www.danas.rs/rss/rss.asp?column_id=1' ) + ,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' ) + ,(u'Svet' , u'http://www.danas.rs/rss/rss.asp?column_id=25') + ,(u'Srbija' , u'http://www.danas.rs/rss/rss.asp?column_id=28') + ,(u'Kultura' , u'http://www.danas.rs/rss/rss.asp?column_id=5' ) + ,(u'Sport' , u'http://www.danas.rs/rss/rss.asp?column_id=13') + ,(u'Scena' , u'http://www.danas.rs/rss/rss.asp?column_id=42') + ,(u'Feljton' , u'http://www.danas.rs/rss/rss.asp?column_id=19') + ,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' ) ] def preprocess_html(self, soup): - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - soup.head.insert(0,mlang) - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] + for item in soup.findAll(style=True): + del item['style'] return soup + + def print_version(self, url): + return url + '&action=print' + diff --git a/resources/recipes/dilbert.recipe b/resources/recipes/dilbert.recipe index ddca52b40a..82966b1d15 100644 --- a/resources/recipes/dilbert.recipe +++ b/resources/recipes/dilbert.recipe @@ -3,6 +3,7 @@ __copyright__ = '2009, Darko Miletic ' ''' http://www.dilbert.com ''' +import re from calibre.web.feeds.recipes import BasicNewsRecipe @@ -28,6 +29,12 @@ class DosisDiarias(BasicNewsRecipe): feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )] + preprocess_regexps = [ + (re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE), + lambda match: 'strip.zoom.gif') + ] + + def get_article_url(self, article): return article.get('feedburner_origlink', None) diff --git a/setup/extensions.py b/setup/extensions.py index 8db7b32a5e..1f51de67f2 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -399,7 +399,7 @@ class BuildPDF2XML(Command): objects.append(obj) if self.newer(dest, objects): - cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand', + cmd = ['g++', '-ggdb', '-o', dest]+objects+['-lpoppler', '-lMagickWand', '-lpng', '-lpthread'] if iswindows: cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split() diff --git a/setup/install.py b/setup/install.py index 56546cd7d4..8424280e95 100644 --- a/setup/install.py +++ b/setup/install.py @@ -137,8 +137,20 @@ class Develop(Command): self.setup_mount_helper() self.install_files() self.run_postinstall() + self.install_env_module() self.success() + def install_env_module(self): + import distutils.sysconfig as s + libdir = s.get_python_lib(prefix=self.opts.staging_root) + if os.path.exists(libdir): + path = os.path.join(libdir, 'init_calibre.py') + self.info('Installing calibre environment module: '+path) + with open(path, 'wb') as f: + f.write(HEADER.format(**self.template_args())) + else: + self.warn('Cannot install calibre environment module to: '+libdir) + def setup_mount_helper(self): def warn(): self.warn('Failed to compile mount helper. Auto mounting of', @@ -180,13 +192,20 @@ class Develop(Command): functions[typ]): self.write_template(name, mod, func) + def template_args(self): + return { + 'path':self.libdir, + 'resources':self.sharedir, + 'executables':self.bindir, + 'extensions':self.j(self.libdir, 'calibre', 'plugins') + } + def write_template(self, name, mod, func): template = COMPLETE_TEMPLATE if name == 'calibre-complete' else TEMPLATE - script = template.format( - module=mod, func=func, - path=self.libdir, resources=self.sharedir, - executables=self.bindir, - extensions=self.j(self.libdir, 'calibre', 'plugins')) + args = self.template_args() + args['module'] = mod + args['func'] = func + script = template.format(**args) path = self.j(self.staging_bindir, name) if not os.path.exists(self.staging_bindir): os.makedirs(self.staging_bindir) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 56c7b892ad..9d4b0694cc 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -7,6 +7,7 @@ import os import glob from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin from calibre.constants import numeric_version +from calibre.ebooks.metadata.archive import ArchiveExtract class HTML2ZIP(FileTypePlugin): name = 'HTML to ZIP' @@ -423,7 +424,7 @@ from calibre.devices.hanvon.driver import N516 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon from calibre.library.catalog import CSV_XML, EPUB_MOBI -plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI] +plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI] plugins += [ ComicInput, EPUBInput, diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 8d33023e43..e15454207f 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -111,7 +111,7 @@ class HTMLFile(object): raise IOError(msg) raise IgnoreFile(msg, err.errno) - self.is_binary = not bool(self.HTML_PAT.search(src[:4096])) + self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096])) if not self.is_binary: if encoding is None: encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1] diff --git a/src/calibre/ebooks/metadata/archive.py b/src/calibre/ebooks/metadata/archive.py new file mode 100644 index 0000000000..6b71f41a88 --- /dev/null +++ b/src/calibre/ebooks/metadata/archive.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import textwrap, os +from contextlib import closing + +from calibre.customize import FileTypePlugin + +class ArchiveExtract(FileTypePlugin): + name = 'Archive Extract' + author = 'Kovid Goyal' + description = textwrap.dedent(_('''\ + Extract common e-book formats from archives (zip/rar) files. + ''')) + file_types = set(['zip', 'rar']) + supported_platforms = ['windows', 'osx', 'linux'] + on_import = True + + def run(self, archive): + is_rar = archive.lower().endswith('.rar') + if is_rar: + from calibre.libunrar import extract_member, names + else: + from calibre.utils.zipfile import ZipFile + zf = ZipFile(archive, 'r') + + if is_rar: + fnames = names(archive) + else: + fnames = zf.namelist() + + fnames = [x for x in fnames if '.' in x] + if len(fnames) > 1 or not fnames: + return archive + fname = fnames[0] + ext = os.path.splitext(fname)[1][1:] + if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', + 'mp3'): + return archive + + of = self.temporary_file('_archive_extract.'+ext) + with closing(of): + if is_rar: + data = extract_member(archive, match=None, name=fname)[1] + of.write(data) + else: + of.write(zf.read(fname)) + return of.name + diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 7c2efe20ef..b885f08962 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -851,8 +851,10 @@ class Manifest(object): self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href) nroot = etree.fromstring('') parent = nroot[0] - for child in list(data): - child.getparent().remove(child) + for child in list(data.iter()): + oparent = child.getparent() + if oparent is not None: + oparent.remove(child) parent.append(child) data = nroot diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index 372baf0959..8f8668b215 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -120,7 +120,10 @@ class EbookIterator(object): bad_map = {} font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)') for csspath in css_files: - css = open(csspath, 'rb').read().decode('utf-8', 'replace') + try: + css = open(csspath, 'rb').read().decode('utf-8', 'replace') + except: + continue for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css): block = match.group(1) family = font_family_pat.search(block) diff --git a/src/calibre/ebooks/pdf/main.cpp b/src/calibre/ebooks/pdf/main.cpp index c0033190fa..d3efb5f1f9 100644 --- a/src/calibre/ebooks/pdf/main.cpp +++ b/src/calibre/ebooks/pdf/main.cpp @@ -169,6 +169,8 @@ int main(int argc, char **argv) { char *memblock; ifstream::pos_type size; int ret = 0; + map info; + Reflow *reflow = NULL; if (argc != 2) { @@ -189,9 +191,13 @@ int main(int argc, char **argv) { } try { - Reflow reflow(memblock, size); - reflow.render(); - vector *data = reflow.render_first_page(); + reflow = new Reflow(memblock, size); + info = reflow->get_info(); + for (map::const_iterator it = info.begin() ; it != info.end(); it++ ) { + cout << (*it).first << " : " << (*it).second << endl; + } + //reflow->render(); + vector *data = reflow->render_first_page(); ofstream file("cover.png", ios::binary); file.write(&((*data)[0]), data->size()); delete data; @@ -200,7 +206,7 @@ int main(int argc, char **argv) { cerr << e.what() << endl; ret = 1; } - + delete reflow; delete[] memblock; return ret; } diff --git a/src/calibre/gui2/convert/page_setup.ui b/src/calibre/gui2/convert/page_setup.ui index 0aa2a97e70..0ea2093697 100644 --- a/src/calibre/gui2/convert/page_setup.ui +++ b/src/calibre/gui2/convert/page_setup.ui @@ -115,6 +115,9 @@ 1 + + 200.000000000000000 + @@ -135,6 +138,9 @@ 1 + + 200.000000000000000 + @@ -155,6 +161,9 @@ 1 + + 200.000000000000000 + @@ -175,6 +184,9 @@ 1 + + 200.000000000000000 + diff --git a/src/calibre/gui2/dialogs/choose_format.py b/src/calibre/gui2/dialogs/choose_format.py index e0fcb0868b..e1f32e3d26 100644 --- a/src/calibre/gui2/dialogs/choose_format.py +++ b/src/calibre/gui2/dialogs/choose_format.py @@ -12,7 +12,8 @@ class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog): QDialog.__init__(self, window) Ui_ChooseFormatDialog.__init__(self) self.setupUi(self) - self.connect(self.formats, SIGNAL('activated(QModelIndex)'), lambda i: self.accept()) + self.connect(self.formats, SIGNAL('activated(QModelIndex)'), + self.activated_slot) self.msg.setText(msg) for format in formats: @@ -20,6 +21,15 @@ class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog): format.upper())) self._formats = formats self.formats.setCurrentRow(0) + self._format = None + + def activated_slot(self, *args): + self.accept() def format(self): - return self._formats[self.formats.currentRow()] + return self._format + + def accept(self): + self._format = self._formats[self.formats.currentRow()] + return QDialog.accept(self) + diff --git a/src/calibre/gui2/lrf_renderer/text.py b/src/calibre/gui2/lrf_renderer/text.py index e612a1af5a..05e87532f4 100644 --- a/src/calibre/gui2/lrf_renderer/text.py +++ b/src/calibre/gui2/lrf_renderer/text.py @@ -481,9 +481,10 @@ class Line(QGraphicsItem): painter.restore() painter.save() painter.setPen(QPen(Qt.NoPen)) - for c in self.children(): - painter.setBrush(c.brush) - painter.drawRect(c.boundingRect()) + if hasattr(self, 'children'): + for c in self.children(): + painter.setBrush(c.brush) + painter.drawRect(c.boundingRect()) painter.restore() painter.save() for tok in self.tokens: diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 6a3c637340..2a43f39f43 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -337,7 +337,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): QObject.connect(self.view_menu.actions()[0], SIGNAL("triggered(bool)"), self.view_book) QObject.connect(self.view_menu.actions()[1], - SIGNAL("triggered(bool)"), self.view_specific_format) + SIGNAL("triggered(bool)"), self.view_specific_format, + Qt.QueuedConnection) self.connect(self.action_open_containing_folder, SIGNAL('triggered(bool)'), self.view_folder) self.delete_menu.actions()[0].triggered.connect(self.delete_books) @@ -1642,12 +1643,9 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): row = rows[0].row() formats = self.library_view.model().db.formats(row).upper().split(',') d = ChooseFormatDialog(self, _('Choose the format to view'), formats) - d.exec_() - if d.result() == QDialog.Accepted: + if d.exec_() == QDialog.Accepted: format = d.format() self.view_format(row, format) - else: - return def view_folder(self, *args): rows = self.current_view().selectionModel().selectedRows() diff --git a/src/calibre/manual/develop.rst b/src/calibre/manual/develop.rst index b9cebcab3d..5f359ad713 100644 --- a/src/calibre/manual/develop.rst +++ b/src/calibre/manual/develop.rst @@ -219,3 +219,30 @@ is great for testing a little snippet of code on the command line. It works in t can be used to execute your own python script. It works in the same way as passing the script to the python interpreter, except that the calibre environment is fully initialized, so you can use all the calibre code in your script. + +Using calibre in your projects +---------------------------------------- + +It is possible to directly use calibre functions/code in your python project. Two ways exist to do this: + +Binary install of calibre +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you have a binary install of calibre, you can use the python interpreter bundled with calibre, like this:: + + calibre-debug -e /path/to/your/python/script.py + +Source install on linux +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In addition to using the above technique, if you do a source install on linux, +you can also directly import calibre, as follows:: + + import init_calibre + import calibre + + print calibre.__version__ + +It is essential that you import the init_calibre module before any other calibre modules/packages as +it sets up the interpreter to run calibre code. + diff --git a/src/calibre/utils/zipfile.py b/src/calibre/utils/zipfile.py index 56e96ee8eb..6bf6f2f83c 100644 --- a/src/calibre/utils/zipfile.py +++ b/src/calibre/utils/zipfile.py @@ -3,10 +3,14 @@ Read and write ZIP files. Modified by Kovid Goyal to support replacing files in a zip archive. """ from __future__ import with_statement -from calibre.ptempfile import TemporaryDirectory -from calibre import sanitize_file_name import struct, os, time, sys, shutil import binascii, cStringIO +from contextlib import closing + +from calibre.ptempfile import TemporaryDirectory +from calibre import sanitize_file_name +from calibre.constants import filesystem_encoding +from calibre.ebooks.chardet import detect try: import zlib # We may need its compression method @@ -132,6 +136,16 @@ _CD64_NUMBER_ENTRIES_TOTAL = 7 _CD64_DIRECTORY_SIZE = 8 _CD64_OFFSET_START_CENTDIR = 9 +def decode_arcname(name): + if not isinstance(name, unicode): + encoding = detect(name)['encoding'] + try: + name = name.decode(encoding) + except: + name = name.decode('utf-8', 'replace') + return name.encode(filesystem_encoding, 'replace') + + def is_zipfile(filename): """Quickly see if file is a ZIP file by checking the magic number.""" try: @@ -222,7 +236,8 @@ def _EndRecData(fpin): endrec = list(struct.unpack(structEndArchive, recData)) comment = data[start+sizeEndCentDir:] # check that comment length is correct - if endrec[_ECD_COMMENT_SIZE] == len(comment): + # Kovid: Added == 0 check as some zip files apparently dont set this + if endrec[_ECD_COMMENT_SIZE] == 0 or endrec[_ECD_COMMENT_SIZE] == len(comment): # Append the archive comment and start offset endrec.append(comment) endrec.append(maxCommentStart + start) @@ -675,6 +690,7 @@ class ZipFile: self.debug = 0 # Level of printing: 0 through 3 self.NameToInfo = {} # Find file info given name self.filelist = [] # List of ZipInfo instances for archive + self.extract_mapping = {} self.compression = compression # Method of compression self.mode = key = mode.replace('b', '')[0] self.pwd = None @@ -1023,10 +1039,10 @@ class ZipFile: targetpath = targetpath[:-1] # don't include leading "/" from file name if present - if os.path.isabs(member.filename): - targetpath = os.path.join(targetpath, member.filename[1:]) - else: - targetpath = os.path.join(targetpath, member.filename) + fname = decode_arcname(member.filename) + if fname.startswith('/'): + fname = fname[1:] + targetpath = os.path.join(targetpath, fname) targetpath = os.path.normpath(targetpath) @@ -1037,17 +1053,16 @@ class ZipFile: if upperdirs and not os.path.exists(upperdirs): os.makedirs(upperdirs) - source = self.open(member, pwd=pwd) if not os.path.exists(targetpath): # Could be a previously automatically created directory - try: - target = open(targetpath, "wb") - except IOError: - targetpath = sanitize_file_name(targetpath) - target = open(targetpath, "wb") - shutil.copyfileobj(source, target) - source.close() - target.close() - + with closing(self.open(member, pwd=pwd)) as source: + try: + with open(targetpath, 'wb') as target: + shutil.copyfileobj(source, target) + except: + targetpath = sanitize_file_name(targetpath) + with open(targetpath, 'wb') as target: + shutil.copyfileobj(source, target) + self.extract_mapping[member.filename] = targetpath return targetpath def _writecheck(self, zinfo): @@ -1328,18 +1343,18 @@ def safe_replace(zipstream, name, datastream): names = z.infolist() with TemporaryDirectory('_zipfile_replace') as tdir: z.extractall(path=tdir) - zipstream.seek(0) - zipstream.truncate() - z = ZipFile(zipstream, 'w') + mapping = z.extract_mapping path = os.path.join(tdir, *name.split('/')) shutil.copyfileobj(datastream, open(path, 'wb')) - for info in names: - current = os.path.join(tdir, *info.filename.split('/')) - if os.path.isdir(current): - z.writestr(info.filename+'/', '', 0700) - else: - z.write(current, info.filename, compress_type=info.compress_type) - z.close() + zipstream.seek(0) + zipstream.truncate() + with closing(ZipFile(zipstream, 'w')) as z: + for info in names: + current = mapping[info.filename] + if os.path.isdir(current): + z.writestr(info.filename+'/', '', 0700) + else: + z.write(current, info.filename, compress_type=info.compress_type) class PyZipFile(ZipFile): """Class to create ZIP archives with Python library files and packages."""