From 9466f30981113219e045f302071340de402309c0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 8 Apr 2009 20:21:45 -0700 Subject: [PATCH 01/10] Fix #2240 (New Yorker download has issues) --- src/calibre/web/feeds/recipes/recipe_new_yorker.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/calibre/web/feeds/recipes/recipe_new_yorker.py b/src/calibre/web/feeds/recipes/recipe_new_yorker.py index 628919ce71..3ebc275d99 100644 --- a/src/calibre/web/feeds/recipes/recipe_new_yorker.py +++ b/src/calibre/web/feeds/recipes/recipe_new_yorker.py @@ -42,3 +42,12 @@ class NewYorker(BasicNewsRecipe): def print_version(self, url): return url + '?printable=true' + + def postprocess_html(self, soup, x): + body = soup.find('body') + if body: + html = soup.find('html') + if html: + body.extract() + html.insert(-1, body) + return soup From 3e4ed73d300d781dd5f511867a256a3d57f2ec32 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 8 Apr 2009 20:23:06 -0700 Subject: [PATCH 02/10] version 0.5.6 --- src/calibre/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 04284ab8b3..00276f6970 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.5.5' +__version__ = '0.5.6' __author__ = "Kovid Goyal " ''' Various run time constants. From 27b9a2c6c48d61e3cce9ffe1227778128479fd2f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 8 Apr 2009 20:24:57 -0700 Subject: [PATCH 03/10] IGN:Tag release From 74cf23ddc8e26404f5b7faadd7d3630de99e17d7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 9 Apr 2009 12:16:39 -0700 Subject: [PATCH 04/10] IGN:... --- src/calibre/manual/faq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 7ca4b1b876..265fbf3ad0 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -105,7 +105,7 @@ Device Integration What devices does |app| support? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1 and 2 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. +At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1/2, Netronix EB600 and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 596b52afac5365d3f7c48b017160fc09530e9c95 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 9 Apr 2009 12:47:26 -0700 Subject: [PATCH 05/10] Fix #2255 (calibre does not handle images with comma in their name) --- src/calibre/web/fetch/simple.py | 127 ++++++++++++++++---------------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 4da3f4019c..90262056bb 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -28,10 +28,10 @@ class closing(object): def __init__(self, thing): self.thing = thing - + def __enter__(self): return self.thing - + def __exit__(self, *exc_info): try: self.thing.close() @@ -55,43 +55,43 @@ def save_soup(soup, target): for meta in metas: if 'charset' in meta.get('content', '').lower(): meta.replaceWith(nm) - + selfdir = os.path.dirname(target) - + for tag in soup.findAll(['img', 'link', 'a']): for key in ('src', 'href'): path = tag.get(key, None) if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path): tag[key] = unicode_path(relpath(path, selfdir).replace(os.sep, '/')) - + html = unicode(soup) with open(target, 'wb') as f: f.write(html.encode('utf-8')) - + class response(str): - + def __new__(cls, *args): obj = super(response, cls).__new__(cls, *args) obj.newurl = None return obj - + class DummyLock(object): - + def __enter__(self, *args): return self def __exit__(self, *args): pass class RecursiveFetcher(object, LoggingInterface): - LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in + LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in ('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$')) #ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in # ( - # + # # ) # ) CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE) default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__ DUMMY_LOCK = DummyLock() - + def __init__(self, options, logger, image_map={}, css_map={}, job_info=None): LoggingInterface.__init__(self, logger) self.base_dir = os.path.abspath(os.path.expanduser(options.dir)) @@ -123,19 +123,19 @@ class RecursiveFetcher(object, LoggingInterface): self.remove_tags_after = getattr(options, 'remove_tags_after', None) self.remove_tags_before = getattr(options, 'remove_tags_before', None) self.keep_only_tags = getattr(options, 'keep_only_tags', []) - self.preprocess_html_ext = getattr(options, 'preprocess_html', lambda soup: soup) + self.preprocess_html_ext = getattr(options, 'preprocess_html', lambda soup: soup) self.postprocess_html_ext= getattr(options, 'postprocess_html', None) self.download_stylesheets = not options.no_stylesheets self.show_progress = True self.failed_links = [] self.job_info = job_info - + def get_soup(self, src): nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) nmassage.extend(self.preprocess_regexps) nmassage += [(re.compile(r'', re.DOTALL), lambda m: '')] # Some websites have buggy doctype declarations that mess up beautifulsoup soup = BeautifulSoup(xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage) - + if self.keep_only_tags: body = Tag(soup, 'body') try: @@ -147,7 +147,7 @@ class RecursiveFetcher(object, LoggingInterface): soup.find('body').replaceWith(body) except AttributeError: # soup has no body element pass - + def remove_beyond(tag, next): while tag is not None and tag.name != 'body': after = getattr(tag, next) @@ -156,31 +156,34 @@ class RecursiveFetcher(object, LoggingInterface): after.extract() after = ns tag = tag.parent - + if self.remove_tags_after is not None: rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after for spec in rt: tag = soup.find(**spec) remove_beyond(tag, 'nextSibling') - + if self.remove_tags_before is not None: tag = soup.find(**self.remove_tags_before) remove_beyond(tag, 'previousSibling') - + for kwds in self.remove_tags: for tag in soup.findAll(**kwds): tag.extract() return self.preprocess_html_ext(soup) - - + + def fetch_url(self, url): data = None self.log_debug('Fetching %s', url) - delta = time.time() - self.last_fetch_at + delta = time.time() - self.last_fetch_at if delta < self.delay: time.sleep(delta) - if re.search(r'\s+', url) is not None: - url = quote(url) + if re.search(r'\s+|,', url) is not None: + purl = list(urlparse.urlparse(url)) + for i in range(2, 6): + purl[i] = quote(purl[i]) + url = urlparse.urlunparse(purl) with self.browser_lock: try: with closing(self.browser.open(url)) as f: @@ -196,38 +199,38 @@ class RecursiveFetcher(object, LoggingInterface): with closing(self.browser.open(url)) as f: data = response(f.read()+f.read()) data.newurl = f.geturl() - else: + else: raise err finally: self.last_fetch_at = time.time() return data - + def start_fetch(self, url): soup = BeautifulSoup(u'') self.log_info('Downloading') res = self.process_links(soup, url, 0, into_dir='') self.log_info('%s saved to %s', url, res) return res - + def is_link_ok(self, url): for i in self.__class__.LINK_FILTER: if i.search(url): return False return True - + def is_link_wanted(self, url): if self.filter_regexps: for f in self.filter_regexps: if f.search(url): - return False + return False if self.match_regexps: for m in self.match_regexps: if m.search(url): return True return False return True - + def process_stylesheets(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'stylesheets')) if not os.path.exists(diskpath): @@ -254,7 +257,7 @@ class RecursiveFetcher(object, LoggingInterface): x.write(data) tag['href'] = stylepath else: - for ns in tag.findAll(text=True): + for ns in tag.findAll(text=True): src = str(ns) m = self.__class__.CSS_IMPORT_PATTERN.search(src) if m: @@ -278,9 +281,9 @@ class RecursiveFetcher(object, LoggingInterface): with open(stylepath, 'wb') as x: x.write(data) ns.replaceWith(src.replace(m.group(1), stylepath)) - - - + + + def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) if not os.path.exists(diskpath): @@ -323,7 +326,7 @@ class RecursiveFetcher(object, LoggingInterface): traceback.print_exc() continue - def absurl(self, baseurl, tag, key, filter=True): + def absurl(self, baseurl, tag, key, filter=True): iurl = tag[key] parts = urlparse.urlsplit(iurl) if not parts.netloc and not parts.path: @@ -337,26 +340,26 @@ class RecursiveFetcher(object, LoggingInterface): self.log_debug('Filtered link: '+iurl) return None return iurl - + def normurl(self, url): parts = list(urlparse.urlsplit(url)) parts[4] = '' return urlparse.urlunsplit(parts) - + def localize_link(self, tag, key, path): parts = urlparse.urlsplit(tag[key]) suffix = '#'+parts.fragment if parts.fragment else '' tag[key] = path+suffix - + def process_return_links(self, soup, baseurl): for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')): - iurl = self.absurl(baseurl, tag, 'href') + iurl = self.absurl(baseurl, tag, 'href') if not iurl: continue nurl = self.normurl(iurl) if self.filemap.has_key(nurl): self.localize_link(tag, 'href', self.filemap[nurl]) - + def process_links(self, soup, baseurl, recursion_level, into_dir='links'): res = '' diskpath = os.path.join(self.current_dir, into_dir) @@ -366,7 +369,7 @@ class RecursiveFetcher(object, LoggingInterface): try: self.current_dir = diskpath tags = list(soup.findAll('a', href=True)) - + for c, tag in enumerate(tags): if self.show_progress: print '.', @@ -396,9 +399,9 @@ class RecursiveFetcher(object, LoggingInterface): dsrc = dsrc.decode(self.encoding, 'ignore') else: dsrc = xml_to_unicode(dsrc, self.verbose)[0] - + soup = self.get_soup(dsrc) - + base = soup.find('base', href=True) if base is not None: newbaseurl = base['href'] @@ -406,7 +409,7 @@ class RecursiveFetcher(object, LoggingInterface): self.process_images(soup, newbaseurl) if self.download_stylesheets: self.process_stylesheets(soup, newbaseurl) - + _fname = basename(iurl) if not isinstance(_fname, unicode): _fname.decode('latin1', 'replace') @@ -420,17 +423,17 @@ class RecursiveFetcher(object, LoggingInterface): self.log_debug('Processing links...') self.process_links(soup, newbaseurl, recursion_level+1) else: - self.process_return_links(soup, newbaseurl) + self.process_return_links(soup, newbaseurl) self.log_debug('Recursion limit reached. Skipping links in %s', iurl) - + if callable(self.postprocess_html_ext): - soup = self.postprocess_html_ext(soup, + soup = self.postprocess_html_ext(soup, c==0 and recursion_level==0 and not getattr(self, 'called_first', False), self.job_info) - + if c==0 and recursion_level == 0: self.called_first = True - + save_soup(soup, res) self.localize_link(tag, 'href', res) except Exception, err: @@ -439,34 +442,34 @@ class RecursiveFetcher(object, LoggingInterface): self.log_debug('Error: %s', str(err), exc_info=True) finally: self.current_dir = diskpath - self.files += 1 + self.files += 1 finally: self.current_dir = prev_dir if self.show_progress: print return res - + def __del__(self): dt = getattr(self, 'default_timeout', None) if dt is not None: socket.setdefaulttimeout(dt) - + def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.com')): parser = OptionParser(usage=usage) - parser.add_option('-d', '--base-dir', + parser.add_option('-d', '--base-dir', help=_('Base directory into which URL is saved. Default is %default'), default='.', type='string', dest='dir') - parser.add_option('-t', '--timeout', + parser.add_option('-t', '--timeout', help=_('Timeout in seconds to wait for a response from the server. Default: %default s'), default=10.0, type='float', dest='timeout') - parser.add_option('-r', '--max-recursions', default=1, + parser.add_option('-r', '--max-recursions', default=1, help=_('Maximum number of levels to recurse i.e. depth of links to follow. Default %default'), type='int', dest='max_recursions') parser.add_option('-n', '--max-files', default=sys.maxint, type='int', dest='max_files', help=_('The maximum number of files to download. This only applies to files from tags. Default is %default')) parser.add_option('--delay', default=0, dest='delay', type='int', help=_('Minimum interval in seconds between consecutive fetches. Default is %default s')) - parser.add_option('--encoding', default=None, + parser.add_option('--encoding', default=None, help=_('The character encoding for the websites you are trying to download. The default is to try and guess the encoding.')) parser.add_option('--match-regexp', default=[], action='append', dest='match_regexps', help=_('Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.')) @@ -487,15 +490,15 @@ def create_fetcher(options, logger=None, image_map={}): return RecursiveFetcher(options, logger, image_map={}) def main(args=sys.argv): - parser = option_parser() + parser = option_parser() options, args = parser.parse_args(args) if len(args) != 2: parser.print_help() return 1 - - fetcher = create_fetcher(options) - fetcher.start_fetch(args[1]) - -if __name__ == '__main__': + fetcher = create_fetcher(options) + fetcher.start_fetch(args[1]) + + +if __name__ == '__main__': sys.exit(main()) From 3e12e32721433b8effb9a35243dd4378d5398c65 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 9 Apr 2009 15:01:40 -0700 Subject: [PATCH 06/10] Fix #2252 (--profile should resize pictures too) --- src/calibre/ebooks/epub/from_html.py | 33 ++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index 318cf5cc02..3fd7b082f9 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -36,7 +36,7 @@ import os, sys, cStringIO, logging, re, functools, shutil from lxml.etree import XPath from lxml import html, etree -from PyQt4.Qt import QApplication, QPixmap +from PyQt4.Qt import QApplication, QPixmap, Qt from calibre.ebooks.html import Processor, merge_metadata, get_filelist,\ opf_traverse, create_metadata, rebase_toc, Link, parser @@ -50,7 +50,7 @@ from calibre.ebooks.epub.pages import add_page_map from calibre.ebooks.epub.fonts import Rationalizer from calibre.constants import preferred_encoding from calibre.customize.ui import run_plugins_on_postprocess -from calibre import walk, CurrentDir, to_unicode +from calibre import walk, CurrentDir, to_unicode, fit_image content = functools.partial(os.path.join, u'content') @@ -112,6 +112,31 @@ def find_html_index(files): return f, os.path.splitext(f)[1].lower()[1:] return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:] +def rescale_images(imgdir, screen_size, log): + pwidth, pheight = screen_size + if QApplication.instance() is None: + QApplication([]) + for f in os.listdir(imgdir): + path = os.path.join(imgdir, f) + if os.path.splitext(f)[1] in ('.css', '.js'): + continue + + p = QPixmap() + p.load(path) + if p.isNull(): + continue + width, height = p.width(), p.height() + scaled, new_width, new_height = fit_image(width, height, pwidth, + pheight) + if scaled: + log.info('Rescaling image: '+f) + p.scaled(new_width, new_height, Qt.IgnoreAspectRatio, + Qt.SmoothTransformation).save(path, 'JPEG') + + + + + class HTMLProcessor(Processor, Rationalizer): def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets): @@ -482,6 +507,10 @@ def convert(htmlfile, opts, notification=None, create_epub=True, if os.stat(ncx_path).st_size > opts.profile.flow_size: logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size) + if opts.profile.screen_size is not None: + rescale_images(os.path.join(tdir, 'content', 'resources'), + opts.profile.screen_size, logger) + if create_epub: epub = initialize_container(opts.output) epub.add_dir(tdir) From 66809fb86d68650423ea3e854fb465be068b845c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 10 Apr 2009 08:41:42 -0700 Subject: [PATCH 07/10] Fix #2256 (Interresting bug with epub format) --- src/calibre/web/feeds/news.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 8f4739c197..a3642b0a33 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1034,6 +1034,28 @@ class BasicNewsRecipe(object, LoggingInterface): nmassage.extend(entity_replace) return BeautifulSoup(raw, markupMassage=nmassage) + @classmethod + def adeify_images(cls, soup): + ''' + If your recipe when converted to EPUB has problems with images when + viewed in Adobe Digital Editions, call this method from within + :method:`postprocess_html`. + ''' + for item in soup.findAll('img'): + for attrib in ['height','width','border','align','style']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + + class CustomIndexRecipe(BasicNewsRecipe): def custom_index(self): From 382dbb3a6dc3efb28f7f4e0ffcae0cbd82719b29 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 10 Apr 2009 08:45:16 -0700 Subject: [PATCH 08/10] Fix #2257 (Updated recipes with fixes for epub image problem) --- .../web/feeds/recipes/recipe_24sata_rs.py | 21 ++++++++++++-- src/calibre/web/feeds/recipes/recipe_blic.py | 29 +++++++++++++++---- src/calibre/web/feeds/recipes/recipe_nspm.py | 22 ++++++++++++-- .../web/feeds/recipes/recipe_tomshardware.py | 13 ++++++++- 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_24sata_rs.py b/src/calibre/web/feeds/recipes/recipe_24sata_rs.py index ac4ee2b860..88860bf493 100644 --- a/src/calibre/web/feeds/recipes/recipe_24sata_rs.py +++ b/src/calibre/web/feeds/recipes/recipe_24sata_rs.py @@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic ' import re from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Ser24Sata(BasicNewsRecipe): title = '24 Sata - Sr' @@ -39,14 +40,30 @@ class Ser24Sata(BasicNewsRecipe): feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')] + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + def preprocess_html(self, soup): soup.html['xml:lang'] = 'sr-Latn-RS' soup.html['lang'] = 'sr-Latn-RS' mtag = '\n' soup.head.insert(0,mtag) - return soup + return self.cleanup_image_tags(soup) def print_version(self, url): article, sep, rest = url.partition('#') - return article.replace('/show.php','/_print.php') + article_base, sep2, article_id = article.partition('id=') + return 'http://www.24sata.co.rs/_print.php?id=' + article_id diff --git a/src/calibre/web/feeds/recipes/recipe_blic.py b/src/calibre/web/feeds/recipes/recipe_blic.py index 05d4e43865..e4e4987dec 100644 --- a/src/calibre/web/feeds/recipes/recipe_blic.py +++ b/src/calibre/web/feeds/recipes/recipe_blic.py @@ -8,11 +8,12 @@ blic.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Blic(BasicNewsRecipe): - title = u'Blic' - __author__ = u'Darko Miletic' - description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' + title = 'Blic' + __author__ = 'Darko Miletic' + description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' publisher = 'RINGIER d.o.o.' category = 'news, politics, Serbia' oldest_article = 2 @@ -21,7 +22,7 @@ class Blic(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False language = _('Serbian') - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} ' html2lrf_options = [ '--comment' , description @@ -30,7 +31,7 @@ class Blic(BasicNewsRecipe): , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -44,10 +45,26 @@ class Blic(BasicNewsRecipe): start_url, question, rest_url = url.partition('?') return u'http://www.blic.rs/_print.php?' + rest_url + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + + def preprocess_html(self, soup): mtag = '' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] - return soup + return self.cleanup_image_tags(soup) \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_nspm.py b/src/calibre/web/feeds/recipes/recipe_nspm.py index 0ff80b8a93..360fb35c35 100644 --- a/src/calibre/web/feeds/recipes/recipe_nspm.py +++ b/src/calibre/web/feeds/recipes/recipe_nspm.py @@ -8,9 +8,10 @@ nspm.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Nspm(BasicNewsRecipe): - title = u'Nova srpska politicka misao' + title = 'Nova srpska politicka misao' __author__ = 'Darko Miletic' description = 'Casopis za politicku teoriju i drustvena istrazivanja' publisher = 'NSPM' @@ -36,7 +37,7 @@ class Nspm(BasicNewsRecipe): preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags = [ - dict(name=['a','img','link','object','embed']) + dict(name=['link','object','embed']) ,dict(name='td', attrs={'class':'buttonheading'}) ] @@ -50,6 +51,21 @@ class Nspm(BasicNewsRecipe): def print_version(self, url): return url.replace('.html','/stampa.html') + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + def preprocess_html(self, soup): lng = 'sr-Latn-RS' soup.html['xml:lang'] = lng @@ -59,4 +75,4 @@ class Nspm(BasicNewsRecipe): ftag['content'] = lng for item in soup.findAll(style=True): del item['style'] - return soup + return self.cleanup_image_tags(soup) diff --git a/src/calibre/web/feeds/recipes/recipe_tomshardware.py b/src/calibre/web/feeds/recipes/recipe_tomshardware.py index 13d164d0e6..af080ccbb7 100644 --- a/src/calibre/web/feeds/recipes/recipe_tomshardware.py +++ b/src/calibre/web/feeds/recipes/recipe_tomshardware.py @@ -63,6 +63,17 @@ class Tomshardware(BasicNewsRecipe): rind = 'http://www.tomshardware.com/review_print.php?p1=' return rind + article_id + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + return soup + def preprocess_html(self, soup): del(soup.body['onload']) - return soup + for item in soup.findAll(style=True): + del item['style'] + for it in soup.findAll('span'): + it.name="div" + return self.cleanup_image_tags(soup) From 59328d799935a901761745a1855136d20c95896d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 10 Apr 2009 11:09:19 -0700 Subject: [PATCH 09/10] Implement #2096 (Move deleted books in the trash folder) --- installer/windows/freeze.py | 15 +- src/calibre/library/database2.py | 24 +- src/calibre/utils/winshell.py | 400 +++++++++++++++++++++++++++++++ 3 files changed, 435 insertions(+), 4 deletions(-) create mode 100644 src/calibre/utils/winshell.py diff --git a/installer/windows/freeze.py b/installer/windows/freeze.py index aca1481fa8..0829907d01 100644 --- a/installer/windows/freeze.py +++ b/installer/windows/freeze.py @@ -14,7 +14,20 @@ IMAGEMAGICK_DIR = 'C:\\ImageMagick' FONTCONFIG_DIR = 'C:\\fontconfig' VC90 = r'C:\VC90.CRT' -import sys, os, py2exe, shutil, zipfile, glob, re +# ModuleFinder can't handle runtime changes to __path__, but win32com uses them +import sys +import py2exe.mf as modulefinder +import win32com +for p in win32com.__path__[1:]: + modulefinder.AddPackagePath("win32com", p) +for extra in ["win32com.shell"]: #,"win32com.mapi" + __import__(extra) + m = sys.modules[extra] + for p in m.__path__[1:]: + modulefinder.AddPackagePath(extra, p) + + +import os, py2exe, shutil, zipfile, glob, re from distutils.core import setup BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) sys.path.insert(0, BASE_DIR) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 999a242986..5683619816 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -31,6 +31,21 @@ from calibre.customize.ui import run_plugins_on_import from calibre import sanitize_file_name from calibre.ebooks import BOOK_EXTENSIONS +if iswindows: + import calibre.utils.winshell as winshell + +def delete_file(path): + try: + winshell.delete_file(path, silent=True, no_confirm=True) + except: + os.remove(path) + +def delete_tree(path): + try: + winshell.delete_file(path, silent=True, no_confirm=True) + except: + shutil.rmtree(path) + copyfile = os.link if hasattr(os, 'link') else shutil.copyfile FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, @@ -499,7 +514,7 @@ class LibraryDatabase2(LibraryDatabase): def rmtree(self, path): if not self.normpath(self.library_path).startswith(self.normpath(path)): - shutil.rmtree(path) + delete_tree(path) def normpath(self, path): path = os.path.abspath(os.path.realpath(path)) @@ -745,7 +760,10 @@ class LibraryDatabase2(LibraryDatabase): path = os.path.join(self.library_path, self.path(id, index_is_id=True)) self.data.remove(id) if os.path.exists(path): - self.rmtree(path) + if iswindows: + winshell.delete_file(path, no_confirm=True, silent=True) + else: + self.rmtree(path) parent = os.path.dirname(path) if len(os.listdir(parent)) == 0: self.rmtree(parent) @@ -764,7 +782,7 @@ class LibraryDatabase2(LibraryDatabase): ext = ('.' + format.lower()) if format else '' path = os.path.join(path, name+ext) try: - os.remove(path) + delete_file(path) except: traceback.print_exc() self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper())) diff --git a/src/calibre/utils/winshell.py b/src/calibre/utils/winshell.py new file mode 100644 index 0000000000..9769f52e4a --- /dev/null +++ b/src/calibre/utils/winshell.py @@ -0,0 +1,400 @@ +"""winshell - convenience functions to access Windows shell functionality + +Certain aspects of the Windows user interface are grouped by + Microsoft as Shell functions. These include the Desktop, shortcut + icons, special folders (such as My Documents) and a few other things. + +These are mostly available via the shell module of the win32all + extensions, but whenever I need to use them, I've forgotten the + various constants and so on. + +Several of the shell items have two variants: personal and common, + or User and All Users. These refer to systems with profiles in use: + anything from NT upwards, and 9x with Profiles turned on. Where + relevant, the Personal/User version refers to that owned by the + logged-on user and visible only to that user; the Common/All Users + version refers to that maintained by an Administrator and visible + to all users of the system. + +(c) Tim Golden 25th November 2003 +Licensed under the (GPL-compatible) MIT License: +http://www.opensource.org/licenses/mit-license.php + +9th Nov 2005 0.2 . License changed to MIT + . Added functionality using SHFileOperation +25th Nov 2003 0.1 . Initial release by Tim Golden +""" + +__VERSION__ = "0.2" + +import os +from win32com import storagecon +from win32com.shell import shell, shellcon +import pythoncom + +class x_winshell (Exception): + pass + +# +# Although this can be done in one call, Win9x didn't +# support it, so I added this workaround. +# +def get_path (folder_id): + return shell.SHGetPathFromIDList (shell.SHGetSpecialFolderLocation (0, folder_id)) + +def desktop (common=0): + "What folder is equivalent to the current desktop?" + return get_path ((shellcon.CSIDL_DESKTOP, shellcon.CSIDL_COMMON_DESKTOPDIRECTORY)[common]) + +def common_desktop (): +# +# Only here because already used in code +# + return desktop (common=1) + +def application_data (common=0): + "What folder holds application configuration files?" + return get_path ((shellcon.CSIDL_APPDATA, shellcon.CSIDL_COMMON_APPDATA)[common]) + +def favourites (common=0): + "What folder holds the Explorer favourites shortcuts?" + return get_path ((shellcon.CSIDL_FAVORITES, shellcon.CSIDL_COMMON_FAVORITES)[common]) +bookmarks = favourites + +def start_menu (common=0): + "What folder holds the Start Menu shortcuts?" + return get_path ((shellcon.CSIDL_STARTMENU, shellcon.CSIDL_COMMON_STARTMENU)[common]) + +def programs (common=0): + "What folder holds the Programs shortcuts (from the Start Menu)?" + return get_path ((shellcon.CSIDL_PROGRAMS, shellcon.CSIDL_COMMON_PROGRAMS)[common]) + +def startup (common=0): + "What folder holds the Startup shortcuts (from the Start Menu)?" + return get_path ((shellcon.CSIDL_STARTUP, shellcon.CSIDL_COMMON_STARTUP)[common]) + +def personal_folder (): + "What folder holds the My Documents files?" + return get_path (shellcon.CSIDL_PERSONAL) +my_documents = personal_folder + +def recent (): + "What folder holds the Documents shortcuts (from the Start Menu)?" + return get_path (shellcon.CSIDL_RECENT) + +def sendto (): + "What folder holds the SendTo shortcuts (from the Context Menu)?" + return get_path (shellcon.CSIDL_SENDTO) + +# +# Internally abstracted function to handle one +# of several shell-based file manipulation +# routines. Not all the possible parameters +# are covered which might be passed to the +# underlying SHFileOperation API call, but +# only those which seemed useful to me at +# the time. +# +def _file_operation ( + operation, + source_path, + target_path=None, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + # + # At present the Python wrapper around SHFileOperation doesn't + # allow lists of files. Hopefully it will at some point, so + # take account of it here. + # If you pass this shell function a "/"-separated path with + # a wildcard, eg c:/temp/*.tmp, it gets confused. It's ok + # with a backslash, so convert here. + # + source_path = source_path or "" + if isinstance (source_path, basestring): + source_path = os.path.abspath (source_path) + else: + source_path = [os.path.abspath (i) for i in source_path] + + target_path = target_path or "" + if isinstance (target_path, basestring): + target_path = os.path.abspath (target_path) + else: + target_path = [os.path.abspath (i) for i in target_path] + + flags = 0 + if allow_undo: flags |= shellcon.FOF_ALLOWUNDO + if no_confirm: flags |= shellcon.FOF_NOCONFIRMATION + if rename_on_collision: flags |= shellcon.FOF_RENAMEONCOLLISION + if silent: flags |= shellcon.FOF_SILENT + + result, n_aborted = shell.SHFileOperation ( + (hWnd or 0, operation, source_path, target_path, flags, None, None) + ) + if result <> 0: + raise x_winshell, result + elif n_aborted: + raise x_winshell, "%d operations were aborted by the user" % n_aborted + +def copy_file ( + source_path, + target_path, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + """Perform a shell-based file copy. Copying in + this way allows the possibility of undo, auto-renaming, + and showing the "flying file" animation during the copy. + + The default options allow for undo, don't automatically + clobber on a name clash, automatically rename on collision + and display the animation. + """ + _file_operation ( + shellcon.FO_COPY, + source_path, + target_path, + allow_undo, + no_confirm, + rename_on_collision, + silent, + hWnd + ) + +def move_file ( + source_path, + target_path, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + """Perform a shell-based file move. Moving in + this way allows the possibility of undo, auto-renaming, + and showing the "flying file" animation during the copy. + + The default options allow for undo, don't automatically + clobber on a name clash, automatically rename on collision + and display the animation. + """ + _file_operation ( + shellcon.FO_MOVE, + source_path, + target_path, + allow_undo, + no_confirm, + rename_on_collision, + silent, + hWnd + ) + +def rename_file ( + source_path, + target_path, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + """Perform a shell-based file rename. Renaming in + this way allows the possibility of undo, auto-renaming, + and showing the "flying file" animation during the copy. + + The default options allow for undo, don't automatically + clobber on a name clash, automatically rename on collision + and display the animation. + """ + _file_operation ( + shellcon.FO_RENAME, + source_path, + target_path, + allow_undo, + no_confirm, + rename_on_collision, + silent, + hWnd + ) + +def delete_file ( + source_path, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + """Perform a shell-based file delete. Deleting in + this way uses the system recycle bin, allows the + possibility of undo, and showing the "flying file" + animation during the delete. + + The default options allow for undo, don't automatically + clobber on a name clash, automatically rename on collision + and display the animation. + """ + _file_operation ( + shellcon.FO_DELETE, + source_path, + None, + allow_undo, + no_confirm, + rename_on_collision, + silent, + hWnd + ) + +def CreateShortcut (Path, Target, Arguments = "", StartIn = "", Icon = ("",0), Description = ""): + """Create a Windows shortcut: + + Path - As what file should the shortcut be created? + Target - What command should the desktop use? + Arguments - What arguments should be supplied to the command? + StartIn - What folder should the command start in? + Icon - (filename, index) What icon should be used for the shortcut? + Description - What description should the shortcut be given? + + eg + CreateShortcut ( + Path=os.path.join (desktop (), "PythonI.lnk"), + Target=r"c:\python\python.exe", + Icon=(r"c:\python\python.exe", 0), + Description="Python Interpreter" + ) + """ + sh = pythoncom.CoCreateInstance ( + shell.CLSID_ShellLink, + None, + pythoncom.CLSCTX_INPROC_SERVER, + shell.IID_IShellLink + ) + + sh.SetPath (Target) + sh.SetDescription (Description) + sh.SetArguments (Arguments) + sh.SetWorkingDirectory (StartIn) + sh.SetIconLocation (Icon[0], Icon[1]) + + persist = sh.QueryInterface (pythoncom.IID_IPersistFile) + persist.Save (Path, 1) + +# +# Constants for structured storage +# +# These come from ObjIdl.h +FMTID_USER_DEFINED_PROPERTIES = "{F29F85E0-4FF9-1068-AB91-08002B27B3D9}" +FMTID_CUSTOM_DEFINED_PROPERTIES = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" + +PIDSI_TITLE = 0x00000002 +PIDSI_SUBJECT = 0x00000003 +PIDSI_AUTHOR = 0x00000004 +PIDSI_CREATE_DTM = 0x0000000c +PIDSI_KEYWORDS = 0x00000005 +PIDSI_COMMENTS = 0x00000006 +PIDSI_TEMPLATE = 0x00000007 +PIDSI_LASTAUTHOR = 0x00000008 +PIDSI_REVNUMBER = 0x00000009 +PIDSI_EDITTIME = 0x0000000a +PIDSI_LASTPRINTED = 0x0000000b +PIDSI_LASTSAVE_DTM = 0x0000000d +PIDSI_PAGECOUNT = 0x0000000e +PIDSI_WORDCOUNT = 0x0000000f +PIDSI_CHARCOUNT = 0x00000010 +PIDSI_THUMBNAIL = 0x00000011 +PIDSI_APPNAME = 0x00000012 +PROPERTIES = ( + PIDSI_TITLE, + PIDSI_SUBJECT, + PIDSI_AUTHOR, + PIDSI_CREATE_DTM, + PIDSI_KEYWORDS, + PIDSI_COMMENTS, + PIDSI_TEMPLATE, + PIDSI_LASTAUTHOR, + PIDSI_EDITTIME, + PIDSI_LASTPRINTED, + PIDSI_LASTSAVE_DTM, + PIDSI_PAGECOUNT, + PIDSI_WORDCOUNT, + PIDSI_CHARCOUNT, + PIDSI_APPNAME +) + +# +# This was taken from someone else's example, +# but I can't find where. If you know, please +# tell me so I can give due credit. +# +def structured_storage (filename): + """Pick out info from MS documents with embedded + structured storage (typically MS Word docs etc.) + + Returns a dictionary of information found + """ + + if not pythoncom.StgIsStorageFile (filename): + return {} + + flags = storagecon.STGM_READ | storagecon.STGM_SHARE_EXCLUSIVE + storage = pythoncom.StgOpenStorage (filename, None, flags) + try: + properties_storage = storage.QueryInterface (pythoncom.IID_IPropertySetStorage) + except pythoncom.com_error: + return {} + + property_sheet = properties_storage.Open (FMTID_USER_DEFINED_PROPERTIES) + try: + data = property_sheet.ReadMultiple (PROPERTIES) + finally: + property_sheet = None + + title, subject, author, created_on, keywords, comments, template_used, \ + updated_by, edited_on, printed_on, saved_on, \ + n_pages, n_words, n_characters, \ + application = data + + result = {} + if title: result['title'] = title + if subject: result['subject'] = subject + if author: result['author'] = author + if created_on: result['created_on'] = created_on + if keywords: result['keywords'] = keywords + if comments: result['comments'] = comments + if template_used: result['template_used'] = template_used + if updated_by: result['updated_by'] = updated_by + if edited_on: result['edited_on'] = edited_on + if printed_on: result['printed_on'] = printed_on + if saved_on: result['saved_on'] = saved_on + if n_pages: result['n_pages'] = n_pages + if n_words: result['n_words'] = n_words + if n_characters: result['n_characters'] = n_characters + if application: result['application'] = application + return result + +if __name__ == '__main__': + try: + print 'Desktop =>', desktop () + print 'Common Desktop =>', desktop (1) + print 'Application Data =>', application_data () + print 'Common Application Data =>', application_data (1) + print 'Bookmarks =>', bookmarks () + print 'Common Bookmarks =>', bookmarks (1) + print 'Start Menu =>', start_menu () + print 'Common Start Menu =>', start_menu (1) + print 'Programs =>', programs () + print 'Common Programs =>', programs (1) + print 'Startup =>', startup () + print 'Common Startup =>', startup (1) + print 'My Documents =>', my_documents () + print 'Recent =>', recent () + print 'SendTo =>', sendto () + finally: + raw_input ("Press enter...") + From 9041567410382ba80d6b36c24662455a8e7506ff Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 10 Apr 2009 12:04:35 -0700 Subject: [PATCH 10/10] Fix #2258 (cancel button not wide enough) --- src/calibre/gui2/lrf_renderer/config.ui | 77 +++++++++++++------------ 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/src/calibre/gui2/lrf_renderer/config.ui b/src/calibre/gui2/lrf_renderer/config.ui index 47956fe003..cad538090a 100644 --- a/src/calibre/gui2/lrf_renderer/config.ui +++ b/src/calibre/gui2/lrf_renderer/config.ui @@ -1,72 +1,73 @@ - + + ViewerConfig - - + + 0 0 - 281 - 214 + 373 + 264 - + Configure Viewer - - + + :/images/config.svg:/images/config.svg - - - - + + + + Use white background - - - + + + Hyphenate - + true - - - - Qt::Horizontal - - - QDialogButtonBox::Cancel|QDialogButtonBox::Ok - - - - - - + + + QFrame::Box - + <b>Changes will only take effect after a restart.</b> - + Qt::RichText - + Qt::AlignCenter - + true + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + - + @@ -75,11 +76,11 @@ ViewerConfig accept() - + 248 254 - + 157 274 @@ -91,11 +92,11 @@ ViewerConfig reject() - + 316 260 - + 286 274