diff --git a/recipes/alternet.recipe b/recipes/alternet.recipe index f885225d31..e58376cc42 100644 --- a/recipes/alternet.recipe +++ b/recipes/alternet.recipe @@ -10,11 +10,11 @@ class Alternet(BasicNewsRecipe): category = 'News, Magazine' description = 'News magazine and online community' feeds = [ - (u'Front Page', u'http://feeds.feedblitz.com/alternet'), - (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'), - (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'), - (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage') - ] + (u'Front Page', u'http://feeds.feedblitz.com/alternet'), + (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'), + (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'), + (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage') + ] remove_attributes = ['width', 'align','cellspacing'] remove_javascript = True use_embedded_content = False @@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe): self.temp_files[-1].write(html) self.temp_files[-1].close() return self.temp_files[-1].name + + conversion_options = {'linearize_tables': True} diff --git a/recipes/goal.recipe b/recipes/goal.recipe new file mode 100644 index 0000000000..23ccbb4744 --- /dev/null +++ b/recipes/goal.recipe @@ -0,0 +1,13 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1325677767(BasicNewsRecipe): + title = u'Goal' + oldest_article = 1 + language = 'it' + max_articles_per_feed = 100 + auto_cleanup = True + remove_tags_after = [dict(id='article_content')] + feeds = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')] + __author__ = 'faber1971' + description = 'Sports news from Italy' + diff --git a/recipes/kopalniawiedzy.recipe b/recipes/kopalniawiedzy.recipe index 628dc1b2d2..a7b932f618 100644 --- a/recipes/kopalniawiedzy.recipe +++ b/recipes/kopalniawiedzy.recipe @@ -1,79 +1,79 @@ __license__ = 'GPL v3' -__copyright__ = '2011, Attis ' +__copyright__ = '2011 Attis , 2012 Tomasz Długosz ' __version__ = 'v. 0.1' import re from calibre.web.feeds.recipes import BasicNewsRecipe class KopalniaWiedzy(BasicNewsRecipe): - title = u'Kopalnia Wiedzy' - publisher = u'Kopalnia Wiedzy' - description = u'Ciekawostki ze świata nauki i techniki' - encoding = 'utf-8' - __author__ = 'Attis' - language = 'pl' - oldest_article = 7 - max_articles_per_feed = 100 - INDEX = u'http://kopalniawiedzy.pl/' - remove_javascript = True - no_stylesheets = True - - remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}] - remove_tags_after = dict(attrs={'class':'ad-square'}) - keep_only_tags = [dict(name="div", attrs={'id':'articleContent'})] - extra_css = '.topimage {margin-top: 30px}' - - preprocess_regexps = [ - (re.compile(u''), - lambda match: '' ), - (re.compile(u'

'), - lambda match: '') - ] - - feeds = [ - (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'), - (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'), - (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'), - (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'), - (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'), - (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss') - ] - - def is_link_wanted(self, url, tag): - return tag['class'] == 'next' - - def remove_beyond(self, tag, next): - while tag is not None and getattr(tag, 'name', None) != 'body': - after = getattr(tag, next) - while after is not None: - ns = getattr(tag, next) - after.extract() - after = ns - tag = tag.parent - - def append_page(self, soup, appendtag, position): - pager = soup.find('a',attrs={'class':'next'}) - if pager: - nexturl = self.INDEX + pager['href'] - soup2 = self.index_to_soup(nexturl) - texttag = soup2.find('div', attrs={'id':'articleContent'}) - - tag = texttag.find(attrs={'class':'pages'}) - self.remove_beyond(tag, 'nextSibling') - - newpos = len(texttag.contents) - self.append_page(soup2,texttag,newpos) + title = u'Kopalnia Wiedzy' + publisher = u'Kopalnia Wiedzy' + description = u'Ciekawostki ze świata nauki i techniki' + encoding = 'utf-8' + __author__ = 'Attis & Tomasz Długosz' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + INDEX = u'http://kopalniawiedzy.pl/' + remove_javascript = True + no_stylesheets = True - appendtag.insert(position,texttag) + remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}] + remove_tags_after = dict(attrs={'class':'ad-square'}) + keep_only_tags = [dict(name="div", attrs={'class':'article-text text-small'})] + extra_css = '.topimage {margin-top: 30px}' + + preprocess_regexps = [ + (re.compile(u''), + lambda match: '' ), + (re.compile(u'

'), + lambda match: '') + ] + + feeds = [ + (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'), + (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'), + (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'), + (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'), + (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'), + (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss') + ] + + def is_link_wanted(self, url, tag): + return tag['class'] == 'next' + + def remove_beyond(self, tag, next): + while tag is not None and getattr(tag, 'name', None) != 'body': + after = getattr(tag, next) + while after is not None: + ns = getattr(tag, next) + after.extract() + after = ns + tag = tag.parent + + def append_page(self, soup, appendtag, position): + pager = soup.find('a',attrs={'class':'next'}) + if pager: + nexturl = self.INDEX + pager['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'id':'articleContent'}) + + tag = texttag.find(attrs={'class':'pages'}) + self.remove_beyond(tag, 'nextSibling') + + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + + appendtag.insert(position,texttag) - def preprocess_html(self, soup): - self.append_page(soup, soup.body, 3) - - for item in soup.findAll('div',attrs={'class':'pages'}): - item.extract() - - for item in soup.findAll('p', attrs={'class':'wykop'}): - item.extract() - - return soup + def preprocess_html(self, soup): + self.append_page(soup, soup.body, 3) + + for item in soup.findAll('div',attrs={'class':'pages'}): + item.extract() + + for item in soup.findAll('p', attrs={'class':'wykop'}): + item.extract() + + return soup diff --git a/recipes/macity.recipe b/recipes/macity.recipe new file mode 100644 index 0000000000..b79fa972cf --- /dev/null +++ b/recipes/macity.recipe @@ -0,0 +1,23 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1325766771(BasicNewsRecipe): + title = u'Macity' + language = 'it' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + def get_article_url(self, article): + link = BasicNewsRecipe.get_article_url(self, article) + if link.split('/')[-1]=="story01.htm": + link=link.split('/')[-2] + a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L' , 'N' , 'S' ] + b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.'] + for i in range(0,len(a)): + link=link.replace('0'+a[-i],b[-i]) + return link + + feeds = [(u'Macity', u'http://www.macitynet.it.feedsportal.com/c/33714/f/599513/index.rss')] + __author__ = 'faber1971' + description = 'Apple and hi-tech news' + diff --git a/recipes/money_pl.recipe b/recipes/money_pl.recipe new file mode 100644 index 0000000000..075264f8f7 --- /dev/null +++ b/recipes/money_pl.recipe @@ -0,0 +1,76 @@ +import re + +from calibre.web.feeds.news import BasicNewsRecipe + +class FocusRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = u'intromatyk ' + language = 'pl' + version = 1 + + title = u'Money.pl' + category = u'News' + description = u'Informacje finansowe z kraju i ze świata. Aktualne i archiwalne: notowania giełdowe, kursy walut, wskaźniki gospodarcze.' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 1 + max_articles_per_feed = 100000 + recursions = 0 + + no_stylesheets = True + remove_javascript = True + + simultaneous_downloads = 2 + + r = re.compile('.*(?Phttp:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*') + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'artykul'})) + remove_tags = [dict(name='ul', attrs={'class':'socialStuff'})] + + extra_css = ''' + body {font-family: Arial,Helvetica,sans-serif ;} + h1{text-align: left;} + h2{font-size: medium; font-weight: bold;} + p.lead {font-weight: bold; text-align: left;} + .authordate {font-size: small; color: #696969;} + .fot{font-size: x-small; color: #666666;} + ''' + + feeds = [ + ('Wiadomosci z kraju', 'http://money.pl.feedsportal.com/c/33900/f/612847/index.rss'), + ('Wiadomosci ze swiata', 'http://money.pl.feedsportal.com/c/33900/f/612848/index.rss'), + ('Gospodarka', 'http://money.pl.feedsportal.com/c/33900/f/612849/index.rss'), + ('Waluty', 'http://money.pl.feedsportal.com/c/33900/f/612850/index.rss'), + ('Gielda', 'http://money.pl.feedsportal.com/c/33900/f/612851/index.rss'), + ('Banki', 'http://money.pl.feedsportal.com/c/33900/f/612852/index.rss'), + ('Fundusze', 'http://money.pl.feedsportal.com/c/33900/f/612853/index.rss'), + ('Emerytury', 'http://money.pl.feedsportal.com/c/33900/f/612854/index.rss'), + ('Podatki', 'http://money.pl.feedsportal.com/c/33900/f/612855/index.rss'), + ('Ubezpieczenia', 'http://money.pl.feedsportal.com/c/33900/f/612856/index.rss'), + ('Poradniki', 'http://money.pl.feedsportal.com/c/33900/f/612857/index.rss'), + ('Raporty', 'http://money.pl.feedsportal.com/c/33900/f/612858/index.rss'), + ('Motoryzacja', 'http://money.pl.feedsportal.com/c/33900/f/612859/index.rss'), + ('Manager', 'http://money.pl.feedsportal.com/c/33900/f/612860/index.rss'), + ('Dla firm', 'http://money.pl.feedsportal.com/c/33900/f/612861/index.rss'), + ('Prawo', 'http://money.pl.feedsportal.com/c/33900/f/612862/index.rss'), + ('Nieruchomosci', 'http://money.pl.feedsportal.com/c/33900/f/612863/index.rss'), + ('Praca', 'http://money.pl.feedsportal.com/c/33900/f/612864/index.rss'), + + + ] + + def print_version(self, url): + if url.count ('money.pl.feedsportal.com'): + u = url.find('0Cartykul0C') + u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:] + u = u.replace('0C', '/') + u = u.replace('A', '') + u = u.replace ('0E','-') + u = u.replace ('0P',';') + u = u.replace ('0H',',') + u = u.replace ('0B','.') + u = u.replace (',0,',',-1,') + u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '') + else: + u = url.replace('/nc/1','/do-druku/1') + return u diff --git a/recipes/wired_it.recipe b/recipes/wired_it.recipe new file mode 100644 index 0000000000..2c1f8a172d --- /dev/null +++ b/recipes/wired_it.recipe @@ -0,0 +1,12 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1325758162(BasicNewsRecipe): + title = u'Wired' + language = 'it' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + remove_tags_after = [dict(name='div', attrs={'class':'article_content'})] + feeds = [(u'Wired', u'http://www.wired.it/rss.xml')] + __author__ = 'faber1971' + description = 'An American magazine that reports on how new technology affects culture, the economy, and politics' diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 0b773a51d8..7f2695b5c4 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -1410,19 +1410,22 @@ class MOBIFile(object): # {{{ self.mobi_header.extra_data_flags, decompress) for r in xrange(1, min(len(self.records), ntr+1))] self.image_records, self.binary_records = [], [] + image_index = 0 for i in xrange(fntbr, len(self.records)): if i in self.indexing_record_nums or i in self.huffman_record_nums: continue + image_index += 1 r = self.records[i] fmt = None - if i >= fii and r.raw[:4] not in (b'FLIS', b'FCIS', b'SRCS', - b'\xe9\x8e\r\n'): + if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS', + b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', + b'AUDI', b'VIDE'}: try: width, height, fmt = identify_data(r.raw) except: pass if fmt is not None: - self.image_records.append(ImageRecord(len(self.image_records)+1, r, fmt)) + self.image_records.append(ImageRecord(image_index, r, fmt)) else: self.binary_records.append(BinaryRecord(i, r)) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index e58b492cef..c0a1687eaf 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -974,12 +974,13 @@ class MobiReader(object): continue processed_records.append(i) data = self.sections[i][0] + image_index += 1 if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}: - # A FLIS, FCIS, SRCS or EOF record, ignore + # This record is a known non image type, not need to try to + # load the image continue buf = cStringIO.StringIO(data) - image_index += 1 try: im = PILImage.open(buf) im = im.convert('RGB') diff --git a/src/calibre/ebooks/oeb/display/cfi.coffee b/src/calibre/ebooks/oeb/display/cfi.coffee index 9c6e18ea06..5020174664 100644 --- a/src/calibre/ebooks/oeb/display/cfi.coffee +++ b/src/calibre/ebooks/oeb/display/cfi.coffee @@ -4,15 +4,24 @@ ### Copyright 2011, Kovid Goyal Released under the GPLv3 License - Based on code originally written by Peter Sorotkin (epubcfi.js) + Based on code originally written by Peter Sorotkin + (http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js) + Improvements with respect to that code: + 1. Works on all browsers (WebKit, Firefox and IE >= 8) + 2. Works if the point is after the last text character in an element + 3. Works for elements that are scrollable (i.e. have their own scrollbars) + + To check if this script is compatible with the current browser, call + window.cfi.is_compatible() it will throw an exception if not compatible. ### -# -log = (error) -> + +log = (error) -> # {{{ if error if window?.console?.log window.console.log(error) else if process?.stdout?.write process.stdout.write(error + '\n') +# }}} # CFI escaping {{{ escape_for_cfi = (raw) -> @@ -51,12 +60,111 @@ fstr = (d) -> # {{{ ans # }}} +get_current_time = (target) -> # {{{ + ans = 0 + if target.currentTime != undefined + ans = target.currentTime + fstr(ans) +# }}} + +viewport_to_document = (x, y, doc) -> # {{{ + win = doc.defaultView + x += win.scrollX + y += win.scrollY + if doc != window.document + # We are in a frame + node = win.frameElement + rect = node.getBoundingClientRect() + return viewport_to_document(rect.left, rect.top, node.ownerDocument) + return [x + win.scrollX, y + win.scrollY] +# }}} + +# Equivalent for caretRangeFromPoint for non WebKit browsers {{{ +range_has_point = (range, x, y) -> + for rect in range.getClientRects() + if (rect.left <= x <= rect.right) and (rect.top <= y <= rect.bottom) + return true + return false + +offset_in_text_node = (node, range, x, y) -> + limits = [0, node.nodeValue.length] + while limits[0] != limits[1] + pivot = Math.floor( (limits[0] + limits[1]) / 2 ) + lr = [limits[0], pivot] + rr = [pivot+1, limits[1]] + range.setStart(node, pivot) + range.setEnd(node, pivot+1) + if range_has_point(range, x, y) + return pivot + range.setStart(node, rr[0]) + range.setEnd(node, rr[1]) + if range_has_point(range, x, y) + limits = rr + continue + range.setStart(node, lr[0]) + range.setEnd(node, lr[1]) + if range_has_point(range, x, y) + limits = lr + continue + break + return limits[0] + +find_offset_for_point = (x, y, node, cdoc) -> + range = cdoc.createRange() + child = node.firstChild + last_child = null + while child + if child.nodeType in [3, 4, 5, 6] and child.nodeValue?.length + range.setStart(child, 0) + range.setEnd(child, child.nodeValue.length) + if range_has_point(range, x, y) + return [child, offset_in_text_node(child, range, x, y)] + last_child = child + child = child.nextSibling + + if not last_child + throw "#{node} has no children" + # The point must be after the last bit of text + pos = 0 + return [last_child, last_child.nodeValue.length] + +# }}} + class CanonicalFragmentIdentifier # This class is a namespace to expose CFI functions via the window.cfi # object - constructor: () -> + constructor: () -> # {{{ + this.CREATE_RANGE_ERR = "Your browser does not support the createRange function. Update it to a newer version." + this.IE_ERR = "Your browser is too old. You need Internet Explorer version 8 or newer." + # }}} + + is_compatible: () -> # {{{ + if not window.document.createRange + throw this.CREATE_RANGE_ERR + # Check if Internet Explorer >= 8 as getClientRects returns physical + # rather than logical pixels on older IE + div = document.createElement('div') + ver = 3 + while true + div.innerHTML = "" + if div.getElementsByTagName('i').length == 0 + break + if ver > 4 and ver < 8 + # We have IE < 8 + throw this.IE_ERR + # }}} + + set_current_time: (target, val) -> # {{{ + if target.currentTime == undefined + return + if target.readyState == 4 or target.readyState == "complete" + target.currentTime = val + else + fn = -> target.currentTime = val + target.addEventListener("canplay", fn, false) + #}}} encode: (doc, node, offset, tail) -> # {{{ cfi = tail or "" @@ -64,7 +172,7 @@ class CanonicalFragmentIdentifier # Handle the offset, if any switch node.nodeType when 1 # Element node - if typeoff(offset) == 'number' + if typeof(offset) == 'number' node = node.childNodes.item(offset) when 3, 4, 5, 6 # Text/entity/CDATA node offset or= 0 @@ -89,12 +197,12 @@ class CanonicalFragmentIdentifier cfi = "!" + cfi continue break - # Increase index by the length of all previous sibling text nodes + # Find position of node in parent index = 0 child = p.firstChild while true - index |= 1 - if child.nodeType in [1, 7] + index |= 1 # Increment index by 1 if it is even + if child.nodeType == 1 index++ if child == node break @@ -117,8 +225,8 @@ class CanonicalFragmentIdentifier error = null node = doc - until cfi.length <= 0 or error - if ( (r = cfi.match(simple_node_regex)) is not null ) # Path step + until cfi.length < 1 or error + if (r = cfi.match(simple_node_regex)) # Path step target = parseInt(r[1]) assertion = r[2] if assertion @@ -136,11 +244,18 @@ class CanonicalFragmentIdentifier error = "No matching child found for CFI: " + cfi break index |= 1 # Increment index by 1 if it is even - if child.nodeType in [1, 7] # We have an element or a PI + if child.nodeType == 1 index++ if ( index == target ) cfi = cfi.substr(r[0].length) node = child + if assertion and node.id != assertion + # The found child does not match the id assertion, + # trust the id assertion if an element with that id + # exists + child = doc.getElementById(assertion) + if child + node = child break child = child.nextSibling @@ -198,7 +313,9 @@ class CanonicalFragmentIdentifier next = false while true nn = node.nextSibling - if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata + if not nn + break + if nn.nodeType in [3, 4, 5, 6] and nn.nodeValue?.length # Text node, entity, cdata next = nn break if not next @@ -253,7 +370,7 @@ class CanonicalFragmentIdentifier (if target.parentNode then target.parentNode else target).normalize() if name in ['audio', 'video'] - tail = "~" + fstr target.currentTime + tail = "~" + get_current_time(target) if name in ['img', 'video'] px = ((x + cwin.scrollX - target.offsetLeft)*100)/target.offsetWidth @@ -265,9 +382,12 @@ class CanonicalFragmentIdentifier if range target = range.startContainer offset = range.startOffset + else + throw "Failed to find range from point (#{ x }, #{ y })" + else if cdoc.createRange + [target, offset] = find_offset_for_point(x, y, target, cdoc) else - # TODO: implement a span bisection algorithm for UAs - # without caretRangeFromPoint (Gecko, IE) + throw this.CREATE_RANGE_ERR this.encode(doc, target, offset, tail) # }}} @@ -285,52 +405,102 @@ class CanonicalFragmentIdentifier nwin = ndoc.defaultView x = null y = null + range = null if typeof(r.offset) == "number" # Character offset + if not ndoc.createRange + throw this.CREATE_RANGE_ERR range = ndoc.createRange() if r.forward try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}] else try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}] - k = 0 a = null rects = null node_len = node.nodeValue.length - until rects or rects.length or k >= try_list.length - t = try_list[k++] - start_offset = r.offset + t.start - end_offset = r.offset + t.end - a = t.a - if start_offset < 0 or end_offset >= node_len - continue - range.setStart(node, start_offset) - range.setEnd(node, end_offset) - rects = range.getClientRects() + offset = r.offset + for i in [0, 1] + # Try reducing the offset by 1 if we get no match as if it refers to the position after the + # last character we wont get a match with getClientRects + offset = r.offset - i + if offset < 0 + offset = 0 + k = 0 + until rects?.length or k >= try_list.length + t = try_list[k++] + start_offset = offset + t.start + end_offset = offset + t.end + a = t.a + if start_offset < 0 or end_offset >= node_len + continue + range.setStart(node, start_offset) + range.setEnd(node, end_offset) + rects = range.getClientRects() + if rects?.length + break - if not rects or not rects.length + + if not rects?.length log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }") return null - rect = rects[0] - x = (a*rect.left + (1-a)*rect.right) - y = (rect.top + rect.bottom)/2 else - x = node.offsetLeft - nwin.scrollX - y = node.offsetTop - nwin.scrollY - if typeof(r.x) == "number" and node.offsetWidth - x += (r.x*node.offsetWidth)/100 - y += (r.y*node.offsetHeight)/100 + [x, y] = [r.x, r.y] - until ndoc == doc - node = nwin.frameElement + {x:x, y:y, node:r.node, time:r.time, range:range, a:a} + + # }}} + + scroll_to: (cfi, callback=false, doc=window?.document) -> # {{{ + point = this.point(cfi, doc) + if not point + log("No point found for cfi: #{ cfi }") + return + if typeof point.time == 'number' + this.set_current_time(point.node, point.time) + + if point.range != null + r = point.range + node = r.startContainer ndoc = node.ownerDocument nwin = ndoc.defaultView - x += node.offsetLeft - nwin.scrollX - y += node.offsetTop - nwin.scrollY + span = ndoc.createElement('span') + span.setAttribute('style', 'border-width: 0; padding: 0; margin: 0') + r.surroundContents(span) + span.scrollIntoView() + fn = -> + rect = span.getBoundingClientRect() + x = (point.a*rect.left + (1-point.a)*rect.right) + y = (rect.top + rect.bottom)/2 + [x, y] = viewport_to_document(x, y, ndoc) + tn = if span.firstChild then span.firstChild.nodeValue else '' + tn = ndoc.createTextNode(tn) + p = span.parentNode + p.insertBefore(tn, span) + p.removeChild(span) + p.normalize() + if callback + callback(x, y) + else + node = point.node + nwin = node.ownerDocument.defaultView + node.scrollIntoView() - {x:x, y:y, node:r.node, time:r.time} + fn = -> + rect = node.getBoundingClientRect() + [x, y] = viewport_to_document(rect.left, rect.top, node.ownerDocument) + if typeof(point.x) == 'number' and node.offsetWidth + x += (r.x*node.offsetWidth)/100 + if typeof(point.y) == 'number' and node.offsetHeight + y += (r.y*node.offsetHeight)/100 + scrollTo(x, y) + if callback + callback(x, y) + setTimeout(fn, 10) + + null # }}} if window? diff --git a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee index 056d24b396..ab82c32df4 100644 --- a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee +++ b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee @@ -6,19 +6,53 @@ Released under the GPLv3 License ### -viewport_top = (node) -> - $(node).offset().top - window.pageYOffset +log = (error) -> + if error + if window?.console?.log + window.console.log(error) + else if process?.stdout?.write + process.stdout.write(error + '\n') -viewport_left = (node) -> - $(node).offset().left - window.pageXOffset +show_cfi = () -> + if window.current_cfi + fn = (x, y) -> + ms = document.getElementById("marker").style + ms.display = 'block' + ms.top = y - 30 + 'px' + ms.left = x - 1 + 'px' + + window.cfi.scroll_to(window.current_cfi, fn) + null + +mark_and_reload = (evt) -> + # Remove image in case the click was on the image itself, we want the cfi to + # be on the underlying element + ms = document.getElementById("marker") + ms.parentNode.removeChild(ms) + + fn = () -> + window.current_cfi = window.cfi.at(evt.clientX, evt.clientY) + if window.current_cfi + epubcfi = "#epubcfi(#{ window.current_cfi })" + newloc = window.location.href.replace(/#.*$/, '') + epubcfi + window.location.replace(newloc) + document.getElementById('current-cfi').innerHTML = window.current_cfi + window.location.reload() + + setTimeout(fn, 1) + null window.onload = -> - h1 = document.getElementsByTagName('h1')[0] - x = h1.scrollLeft + 150 - y = viewport_top(h1) + h1.offsetHeight/2 - e = document.elementFromPoint x, y - if e.getAttribute('id') != 'first-h1' - alert 'Failed to find top h1' + try + window.cfi.is_compatible() + catch error + alert(error) return - alert window.cfi.at x, y + document.onclick = mark_and_reload + r = location.hash.match(/#epubcfi\((.+)\)$/) + if r + window.current_cfi = r[1] + document.getElementById('current-cfi').innerHTML = window.current_cfi + setTimeout(show_cfi, 100) + null diff --git a/src/calibre/ebooks/oeb/display/test/index.html b/src/calibre/ebooks/oeb/display/test/index.html new file mode 100644 index 0000000000..afeae055a4 --- /dev/null +++ b/src/calibre/ebooks/oeb/display/test/index.html @@ -0,0 +1,65 @@ + + + + Testing CFI functionality + + + + + +
+

Testing EPUB CFI

+
Current CFI: None
+

A div with scrollbars

+
But I must explain to you how all this mistaken + idea of denouncing pleasure and praising pain was born and I + will give you a complete account of the system, and expound the + actual teachings of the great explorer of the truth, the + master-builder of human happiness. No one rejects, dislikes, or + avoids pleasure itself, because it is pleasure, but because + those who do not know how to pursue pleasure rationally + encounter consequences that are extremely painful. Nor again is + there anyone who loves or pursues or desires to obtain pain of + itself, because it is pain, but because occasionally + circumstances occur in which toil and pain can procure him some + great pleasure. To take a trivial example, which of us ever + undertakes laborious physical exercise, except to obtain some + advantage from it? But who has any right to find fault with a + man who chooses to enjoy a pleasure that has no annoying + consequences, or one who avoids a pain that produces no + resultant pleasure? On the other hand, we denounce with + righteous indignation and dislike men who are so beguiled and + demoralized by the charms of pleasure of the moment, so blinded + by desire, that they cannot foresee +
+
+ + + + + diff --git a/src/calibre/ebooks/oeb/display/test/marker.png b/src/calibre/ebooks/oeb/display/test/marker.png new file mode 100644 index 0000000000..6dcc1fb7ba Binary files /dev/null and b/src/calibre/ebooks/oeb/display/test/marker.png differ diff --git a/src/calibre/ebooks/oeb/display/test/test.html b/src/calibre/ebooks/oeb/display/test/test.html deleted file mode 100644 index 3dbda451c0..0000000000 --- a/src/calibre/ebooks/oeb/display/test/test.html +++ /dev/null @@ -1,14 +0,0 @@ - - - - Testing CFI functionality - - - - - -

Testing CFI functionality

- - - - diff --git a/src/calibre/ebooks/oeb/display/test/test.py b/src/calibre/ebooks/oeb/display/test/test.py index e217027312..a09f45dcf0 100644 --- a/src/calibre/ebooks/oeb/display/test/test.py +++ b/src/calibre/ebooks/oeb/display/test/test.py @@ -16,10 +16,9 @@ except ImportError: if False: init_calibre, serve from calibre.utils.coffeescript import serve - def run_devel_server(): os.chdir(os.path.dirname(os.path.abspath(__file__))) - serve() + serve(resources={'cfi.coffee':'../cfi.coffee'}) if __name__ == '__main__': run_devel_server() diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 7bb23946ca..d4f409d3cc 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -80,7 +80,7 @@ class PML_HTMLizer(object): 'b': ('', ''), 'l': ('', ''), 'k': ('', ''), - 'FN': ('

', '

<return
'), + 'FN': ('

', '

return
'), 'SB': ('

', '

return
'), } @@ -143,7 +143,7 @@ class PML_HTMLizer(object): 'd', 'b', ] - + NEW_LINE_EXCHANGE_STATES = { 'h1': 'h1c', } @@ -230,7 +230,7 @@ class PML_HTMLizer(object): div = [] span = [] other = [] - + for key, val in state.items(): if key in self.NEW_LINE_EXCHANGE_STATES and val[0]: state[self.NEW_LINE_EXCHANGE_STATES[key]] = val @@ -644,7 +644,7 @@ class PML_HTMLizer(object): empty_count = 0 text = self.end_line() parsed.append(text) - + # Basic indent will be set if the \t starts the line or # if we are in a continuing \t block. if basic_indent: @@ -666,7 +666,7 @@ class PML_HTMLizer(object): parsed.append(self.STATES_TAGS['T'][1]) indent_state['T'] = False adv_indent_val = '' - + output.append(u''.join(parsed)) line.close() @@ -677,7 +677,7 @@ class PML_HTMLizer(object): def get_toc(self): ''' Toc can have up to 5 levels, 0 - 4 inclusive. - + This function will add items to their appropriate depth in the TOC tree. If the specified depth is invalid (item would not have a valid parent) add diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 2fc14c8238..7cdac3b845 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -14,7 +14,7 @@ from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata import MetaInformation from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG from calibre.utils.config import prefs -from calibre import prints +from calibre import prints, force_unicode, as_unicode single_shot = partial(QTimer.singleShot, 75) @@ -66,7 +66,8 @@ class RecursiveFind(QThread): # {{{ if self.canceled: return self.update.emit( - _('Searching in')+' '+dirpath[0]) + _('Searching in')+' '+force_unicode(dirpath[0], + filesystem_encoding)) self.books += list(self.db.find_books_in_directory(dirpath[0], self.single_book_per_directory)) @@ -82,10 +83,7 @@ class RecursiveFind(QThread): # {{{ except Exception as err: import traceback traceback.print_exc() - try: - msg = unicode(err) - except: - msg = repr(err) + msg = as_unicode(err) self.found.emit(msg) return diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 4e8d59f61d..70da33b7f2 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -12,14 +12,13 @@ from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, QPainter, QPalette, QBrush, QFontDatabase, QDialog, QColor, QPoint, QImage, QRegion, QVariant, QIcon, QFont, pyqtSignature, QAction, QByteArray, QMenu, - pyqtSignal) + pyqtSignal, QSwipeGesture) from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings from calibre.utils.config import Config, StringConfig from calibre.utils.localization import get_language from calibre.gui2.viewer.config_ui import Ui_Dialog from calibre.gui2.viewer.flip import SlideFlip -from calibre.gui2.viewer.gestures import Gestures from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig from calibre.constants import iswindows from calibre import prints, guess_type @@ -514,7 +513,6 @@ class DocumentView(QWebView): # {{{ def __init__(self, *args): QWebView.__init__(self, *args) self.flipper = SlideFlip(self) - self.gestures = Gestures() self.is_auto_repeat_event = False self.debug_javascript = False self.shortcuts = Shortcuts(SHORTCUTS, 'shortcuts/viewer') @@ -582,6 +580,7 @@ class DocumentView(QWebView): # {{{ else: m.addAction(name, a[key], self.shortcuts.get_sequences(key)[0]) self.goto_location_action.setMenu(self.goto_location_menu) + self.grabGesture(Qt.SwipeGesture) def goto_next_section(self, *args): if self.manager is not None: @@ -1047,28 +1046,24 @@ class DocumentView(QWebView): # {{{ self.manager.viewport_resized(self.scroll_fraction) def event(self, ev): - typ = ev.type() - if typ == ev.TouchBegin: - try: - self.gestures.start_gesture('touch', ev) - except: - import traceback - traceback.print_exc() - elif typ == ev.TouchEnd: - try: - gesture = self.gestures.end_gesture('touch', ev, self.rect()) - except: - import traceback - traceback.print_exc() - if gesture is not None: - ev.accept() - if gesture == 'lineleft': - self.next_page() - elif gesture == 'lineright': - self.previous_page() + if ev.type() == ev.Gesture: + swipe = ev.gesture(Qt.SwipeGesture) + if swipe is not None: + self.handle_swipe(swipe) return True return QWebView.event(self, ev) + def handle_swipe(self, swipe): + if swipe.state() == Qt.GestureFinished: + if swipe.horizontalDirection() == QSwipeGesture.Left: + self.previous_page() + elif swipe.horizontalDirection() == QSwipeGesture.Right: + self.next_page() + elif swipe.verticalDirection() == QSwipeGesture.Up: + self.goto_previous_section() + elif swipe.horizontalDirection() == QSwipeGesture.Down: + self.goto_next_section() + def mouseReleaseEvent(self, ev): opos = self.document.ypos ret = QWebView.mouseReleaseEvent(self, ev) diff --git a/src/calibre/gui2/viewer/gestures.py b/src/calibre/gui2/viewer/gestures.py deleted file mode 100644 index 86d2f842b9..0000000000 --- a/src/calibre/gui2/viewer/gestures.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -import time - -class Gestures(object): - - def __init__(self): - self.in_progress = {} - - def get_boundary_point(self, event): - t = time.time() - id_ = None - if hasattr(event, 'touchPoints'): - tps = list(event.touchPoints()) - tp = None - for t in tps: - if t.isPrimary(): - tp = t - break - if tp is None: - tp = tps[0] - gp, p = tp.screenPos(), tp.pos() - id_ = tp.id() - else: - gp, p = event.globalPos(), event.pos() - return (t, gp, p, id_) - - def start_gesture(self, typ, event): - self.in_progress[typ] = self.get_boundary_point(event) - - def is_in_progress(self, typ): - return typ in self.in_progress - - def end_gesture(self, typ, event, widget_rect): - if not self.is_in_progress(typ): - return - start = self.in_progress[typ] - end = self.get_boundary_point(event) - if start[3] != end[3]: - return - timespan = end[0] - start[0] - start_pos, end_pos = start[1], end[1] - xspan = end_pos.x() - start_pos.x() - yspan = end_pos.y() - start_pos.y() - - width = widget_rect.width() - - if timespan < 1.1 and abs(xspan) >= width/5. and \ - abs(yspan) < abs(xspan)/5.: - # Quick horizontal gesture - return 'line'+('left' if xspan < 0 else 'right') - - return None - - - diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py index 430ced9fdd..de21158ed7 100644 --- a/src/calibre/utils/browser.py +++ b/src/calibre/utils/browser.py @@ -11,7 +11,11 @@ from cookielib import CookieJar from mechanize import Browser as B class Browser(B): - 'A cloneable mechanize browser' + ''' + A cloneable mechanize browser. Useful for multithreading. The idea is that + each thread has a browser clone. Every clone uses the same thread safe + cookie jar. All clones share the same browser configuration. + ''' def __init__(self): self._clone_actions = {} diff --git a/src/calibre/utils/coffeescript.py b/src/calibre/utils/coffeescript.py index 6178685bbb..057cfeef17 100644 --- a/src/calibre/utils/coffeescript.py +++ b/src/calibre/utils/coffeescript.py @@ -11,16 +11,32 @@ __docformat__ = 'restructuredtext en' Utilities to help with developing coffeescript based apps ''' import time, SimpleHTTPServer, SocketServer, os, subprocess +from io import BytesIO class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler): - generated_files = set() + special_resources = {} + compiled_cs = {} + + def send_head(self): + path = self.path + if path.endswith('.coffee'): + path = path[1:] if path.startswith('/') else path + path = self.special_resources.get(path, path) + raw, mtime = self.compile_coffeescript(path) + self.send_response(200) + self.send_header("Content-type", b'text/javascript') + self.send_header("Content-Length", bytes(len(raw))) + self.send_header("Last-Modified", self.date_time_string(int(mtime))) + self.end_headers() + return BytesIO(raw) + + return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self) def translate_path(self, path): - if path.endswith('jquery.js'): + path = self.special_resources.get(path, path) + if path.endswith('/jquery.js'): return P('content_server/jquery.js') - if path.endswith('.coffee'): - return self.compile_coffeescript(path[1:]) return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(self, path) @@ -31,36 +47,33 @@ class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler): except: time.sleep(0.01) sstat = os.stat(src) - return (not os.access(dest, os.R_OK) or sstat.st_mtime > - os.stat(dest).st_mtime) + return sstat.st_mtime > dest def compile_coffeescript(self, src): - dest = os.path.splitext(src)[0] + '.js' - self.generated_files.add(dest) - if self.newer(src, dest): - with open(dest, 'wb') as f: - try: - subprocess.check_call(['coffee', '-c', '-p', src], stdout=f) - except: - print('Compilation of %s failed'%src) - f.seek(0) - f.truncate() - f.write('// Compilation of coffeescript failed') - f.write('alert("Compilation of %s failed");'%src) - return dest + raw, mtime = self.compiled_cs.get(src, (None, 0)) + if self.newer(src, mtime): + mtime = time.time() + try: + raw = subprocess.check_output(['coffee', '-c', '-p', src]) + except: + print('Compilation of %s failed'%src) + cs = ''' + // Compilation of coffeescript failed + alert("Compilation of %s failed"); + '''%src + raw = cs.encode('utf-8') + self.compiled_cs[src] = (raw, mtime) + return raw, mtime -def serve(port=8000): - httpd = SocketServer.TCPServer(('localhost', port), Handler) +class HTTPD(SocketServer.TCPServer): + allow_reuse_address = True + +def serve(resources={}, port=8000): + Handler.special_resources = resources + httpd = HTTPD(('0.0.0.0', port), Handler) print('serving at localhost:%d'%port) try: - try: - httpd.serve_forever() - except KeyboardInterrupt: - raise SystemExit(0) - finally: - for x in Handler.generated_files: - try: - os.remove(x) - except: - pass + httpd.serve_forever() + except KeyboardInterrupt: + raise SystemExit(0)