From 05312e63890526bfad0c5a557623b9aefba43401 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Jan 2012 09:33:23 +0530 Subject: [PATCH 01/19] ... --- src/calibre/utils/browser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py index 430ced9fdd..de21158ed7 100644 --- a/src/calibre/utils/browser.py +++ b/src/calibre/utils/browser.py @@ -11,7 +11,11 @@ from cookielib import CookieJar from mechanize import Browser as B class Browser(B): - 'A cloneable mechanize browser' + ''' + A cloneable mechanize browser. Useful for multithreading. The idea is that + each thread has a browser clone. Every clone uses the same thread safe + cookie jar. All clones share the same browser configuration. + ''' def __init__(self): self._clone_actions = {} From bb94a8dbe2936d138256f44a12785ba270a4fe7e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Jan 2012 10:51:37 +0530 Subject: [PATCH 02/19] MOBI Input: Fix regression that caused a mixup of images when the MOBI file header contains an incorrect first image index pointer. Fixes #911243 (Private bug) --- src/calibre/ebooks/mobi/debug.py | 4 +++- src/calibre/ebooks/mobi/reader.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 0b773a51d8..7b04ee57a8 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -1410,9 +1410,11 @@ class MOBIFile(object): # {{{ self.mobi_header.extra_data_flags, decompress) for r in xrange(1, min(len(self.records), ntr+1))] self.image_records, self.binary_records = [], [] + image_index = 0 for i in xrange(fntbr, len(self.records)): if i in self.indexing_record_nums or i in self.huffman_record_nums: continue + image_index += 1 r = self.records[i] fmt = None if i >= fii and r.raw[:4] not in (b'FLIS', b'FCIS', b'SRCS', @@ -1422,7 +1424,7 @@ class MOBIFile(object): # {{{ except: pass if fmt is not None: - self.image_records.append(ImageRecord(len(self.image_records)+1, r, fmt)) + self.image_records.append(ImageRecord(image_index, r, fmt)) else: self.binary_records.append(BinaryRecord(i, r)) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index e58b492cef..c0a1687eaf 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -974,12 +974,13 @@ class MobiReader(object): continue processed_records.append(i) data = self.sections[i][0] + image_index += 1 if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}: - # A FLIS, FCIS, SRCS or EOF record, ignore + # This record is a known non image type, not need to try to + # load the image continue buf = cStringIO.StringIO(data) - image_index += 1 try: im = PILImage.open(buf) im = im.convert('RGB') From 896c451aef46c23bc18f152479e6ca53c93db642 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Jan 2012 11:02:27 +0530 Subject: [PATCH 03/19] ... --- src/calibre/ebooks/mobi/debug.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 7b04ee57a8..7f2695b5c4 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -1417,8 +1417,9 @@ class MOBIFile(object): # {{{ image_index += 1 r = self.records[i] fmt = None - if i >= fii and r.raw[:4] not in (b'FLIS', b'FCIS', b'SRCS', - b'\xe9\x8e\r\n'): + if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS', + b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', + b'AUDI', b'VIDE'}: try: width, height, fmt = identify_data(r.raw) except: From 99b5a7569122f8236bf49b0028ca7b37078f56b1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Jan 2012 13:35:31 +0530 Subject: [PATCH 04/19] ... --- src/calibre/ebooks/oeb/display/cfi.coffee | 27 ++++++++++++++++--- .../display/test/{test.html => index.html} | 2 +- src/calibre/ebooks/oeb/display/test/test.py | 3 +-- src/calibre/utils/coffeescript.py | 13 ++++++--- 4 files changed, 35 insertions(+), 10 deletions(-) rename src/calibre/ebooks/oeb/display/test/{test.html => index.html} (83%) diff --git a/src/calibre/ebooks/oeb/display/cfi.coffee b/src/calibre/ebooks/oeb/display/cfi.coffee index 9c6e18ea06..a6cf638eed 100644 --- a/src/calibre/ebooks/oeb/display/cfi.coffee +++ b/src/calibre/ebooks/oeb/display/cfi.coffee @@ -4,7 +4,7 @@ ### Copyright 2011, Kovid Goyal Released under the GPLv3 License - Based on code originally written by Peter Sorotkin (epubcfi.js) + Based on code originally written by Peter Sorotkin (http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js) ### # log = (error) -> @@ -51,6 +51,24 @@ fstr = (d) -> # {{{ ans # }}} +get_current_time = (target) -> # {{{ + ans = 0 + if target.currentTime != undefined + ans = target.currentTime + fstr(ans) +# }}} + +set_current_time = (target, val) -> # {{{ + if target.currentTime == undefined + return + if target.readyState == 4 or target.readyState == "complete" + target.currentTime = val + else + fn = -> target.currentTime = val + target.addEventListener("canplay", fn, false) + +#}}} + class CanonicalFragmentIdentifier # This class is a namespace to expose CFI functions via the window.cfi @@ -102,7 +120,8 @@ class CanonicalFragmentIdentifier # Add id assertions for robustness where possible id = node.getAttribute?('id') - idspec = if id then "[#{ escape_for_cfi(id) }]" else '' + idok = id and id.match(/^[-a-zA-Z_0-9.\u007F-\uFFFF]+$/) + idspec = if idok then "[#{ escape_for_cfi(id) }]" else '' cfi = '/' + index + idspec + cfi node = p @@ -117,7 +136,7 @@ class CanonicalFragmentIdentifier error = null node = doc - until cfi.length <= 0 or error + until cfi.length < 1 or error if ( (r = cfi.match(simple_node_regex)) is not null ) # Path step target = parseInt(r[1]) assertion = r[2] @@ -253,7 +272,7 @@ class CanonicalFragmentIdentifier (if target.parentNode then target.parentNode else target).normalize() if name in ['audio', 'video'] - tail = "~" + fstr target.currentTime + tail = "~" + get_current_time(target) if name in ['img', 'video'] px = ((x + cwin.scrollX - target.offsetLeft)*100)/target.offsetWidth diff --git a/src/calibre/ebooks/oeb/display/test/test.html b/src/calibre/ebooks/oeb/display/test/index.html similarity index 83% rename from src/calibre/ebooks/oeb/display/test/test.html rename to src/calibre/ebooks/oeb/display/test/index.html index 3dbda451c0..1b93bb9739 100644 --- a/src/calibre/ebooks/oeb/display/test/test.html +++ b/src/calibre/ebooks/oeb/display/test/index.html @@ -2,7 +2,7 @@ Testing CFI functionality - + diff --git a/src/calibre/ebooks/oeb/display/test/test.py b/src/calibre/ebooks/oeb/display/test/test.py index e217027312..28a1becaeb 100644 --- a/src/calibre/ebooks/oeb/display/test/test.py +++ b/src/calibre/ebooks/oeb/display/test/test.py @@ -16,10 +16,9 @@ except ImportError: if False: init_calibre, serve from calibre.utils.coffeescript import serve - def run_devel_server(): os.chdir(os.path.dirname(os.path.abspath(__file__))) - serve() + serve(resources={'/cfi.coffee':'../cfi.coffee'}) if __name__ == '__main__': run_devel_server() diff --git a/src/calibre/utils/coffeescript.py b/src/calibre/utils/coffeescript.py index 6178685bbb..b80532be3b 100644 --- a/src/calibre/utils/coffeescript.py +++ b/src/calibre/utils/coffeescript.py @@ -15,12 +15,15 @@ import time, SimpleHTTPServer, SocketServer, os, subprocess class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler): generated_files = set() + special_resources = {} def translate_path(self, path): + path = self.special_resources.get(path, path) if path.endswith('jquery.js'): return P('content_server/jquery.js') if path.endswith('.coffee'): - return self.compile_coffeescript(path[1:]) + path = path[1:] if path.startswith('/') else path + return self.compile_coffeescript(path) return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(self, path) @@ -49,8 +52,12 @@ class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler): f.write('alert("Compilation of %s failed");'%src) return dest -def serve(port=8000): - httpd = SocketServer.TCPServer(('localhost', port), Handler) +class HTTPD(SocketServer.TCPServer): + allow_reuse_address = True + +def serve(resources={}, port=8000): + Handler.special_resources = resources + httpd = HTTPD(('localhost', port), Handler) print('serving at localhost:%d'%port) try: try: From 4d2b798cbc8ca1cf7652a7ca65421737ee622bc7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Jan 2012 16:01:44 +0530 Subject: [PATCH 05/19] ... --- src/calibre/ebooks/oeb/display/cfi.coffee | 18 ++++-- src/calibre/ebooks/oeb/display/test/test.py | 2 +- src/calibre/utils/coffeescript.py | 67 +++++++++++---------- 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/src/calibre/ebooks/oeb/display/cfi.coffee b/src/calibre/ebooks/oeb/display/cfi.coffee index a6cf638eed..dea5b2fa4b 100644 --- a/src/calibre/ebooks/oeb/display/cfi.coffee +++ b/src/calibre/ebooks/oeb/display/cfi.coffee @@ -107,12 +107,12 @@ class CanonicalFragmentIdentifier cfi = "!" + cfi continue break - # Increase index by the length of all previous sibling text nodes + # Find position of node in parent index = 0 child = p.firstChild while true - index |= 1 - if child.nodeType in [1, 7] + index |= 1 # Increment index by 1 if it is even + if child.nodeType == 1 index++ if child == node break @@ -120,8 +120,7 @@ class CanonicalFragmentIdentifier # Add id assertions for robustness where possible id = node.getAttribute?('id') - idok = id and id.match(/^[-a-zA-Z_0-9.\u007F-\uFFFF]+$/) - idspec = if idok then "[#{ escape_for_cfi(id) }]" else '' + idspec = if id then "[#{ escape_for_cfi(id) }]" else '' cfi = '/' + index + idspec + cfi node = p @@ -155,11 +154,18 @@ class CanonicalFragmentIdentifier error = "No matching child found for CFI: " + cfi break index |= 1 # Increment index by 1 if it is even - if child.nodeType in [1, 7] # We have an element or a PI + if child.nodeType == 1 index++ if ( index == target ) cfi = cfi.substr(r[0].length) node = child + if assertion and node.id != assertion + # The found child does not match the id assertion, + # trust the id assertion if an element with that id + # exists + child = doc.getElementById(assertion) + if child + node = child break child = child.nextSibling diff --git a/src/calibre/ebooks/oeb/display/test/test.py b/src/calibre/ebooks/oeb/display/test/test.py index 28a1becaeb..a09f45dcf0 100644 --- a/src/calibre/ebooks/oeb/display/test/test.py +++ b/src/calibre/ebooks/oeb/display/test/test.py @@ -18,7 +18,7 @@ except ImportError: def run_devel_server(): os.chdir(os.path.dirname(os.path.abspath(__file__))) - serve(resources={'/cfi.coffee':'../cfi.coffee'}) + serve(resources={'cfi.coffee':'../cfi.coffee'}) if __name__ == '__main__': run_devel_server() diff --git a/src/calibre/utils/coffeescript.py b/src/calibre/utils/coffeescript.py index b80532be3b..302acb388e 100644 --- a/src/calibre/utils/coffeescript.py +++ b/src/calibre/utils/coffeescript.py @@ -10,20 +10,32 @@ __docformat__ = 'restructuredtext en' ''' Utilities to help with developing coffeescript based apps ''' -import time, SimpleHTTPServer, SocketServer, os, subprocess +import time, SimpleHTTPServer, SocketServer, os, subprocess, cStringIO class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler): - generated_files = set() special_resources = {} + compiled_cs = {} + + def send_head(self): + path = self.path + if path.endswith('.coffee'): + path = path[1:] if path.startswith('/') else path + path = self.special_resources.get(path, path) + raw, mtime = self.compile_coffeescript(path) + self.send_response(200) + self.send_header("Content-type", b'text/javascript') + self.send_header("Content-Length", bytes(len(raw))) + self.send_header("Last-Modified", self.date_time_string(int(mtime))) + self.end_headers() + return cStringIO.StringIO(raw) + + return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self) def translate_path(self, path): path = self.special_resources.get(path, path) - if path.endswith('jquery.js'): + if path.endswith('/jquery.js'): return P('content_server/jquery.js') - if path.endswith('.coffee'): - path = path[1:] if path.startswith('/') else path - return self.compile_coffeescript(path) return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(self, path) @@ -34,23 +46,23 @@ class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler): except: time.sleep(0.01) sstat = os.stat(src) - return (not os.access(dest, os.R_OK) or sstat.st_mtime > - os.stat(dest).st_mtime) + return sstat.st_mtime > dest def compile_coffeescript(self, src): - dest = os.path.splitext(src)[0] + '.js' - self.generated_files.add(dest) - if self.newer(src, dest): - with open(dest, 'wb') as f: - try: - subprocess.check_call(['coffee', '-c', '-p', src], stdout=f) - except: - print('Compilation of %s failed'%src) - f.seek(0) - f.truncate() - f.write('// Compilation of coffeescript failed') - f.write('alert("Compilation of %s failed");'%src) - return dest + raw, mtime = self.compiled_cs.get(src, (None, 0)) + if self.newer(src, mtime): + mtime = time.time() + try: + raw = subprocess.check_output(['coffee', '-c', '-p', src]) + except: + print('Compilation of %s failed'%src) + cs = ''' + // Compilation of coffeescript failed + alert("Compilation of %s failed"); + '''%src + raw = cs.encode('utf-8') + self.compiled_cs[src] = (raw, mtime) + return raw, mtime class HTTPD(SocketServer.TCPServer): allow_reuse_address = True @@ -60,14 +72,7 @@ def serve(resources={}, port=8000): httpd = HTTPD(('localhost', port), Handler) print('serving at localhost:%d'%port) try: - try: - httpd.serve_forever() - except KeyboardInterrupt: - raise SystemExit(0) - finally: - for x in Handler.generated_files: - try: - os.remove(x) - except: - pass + httpd.serve_forever() + except KeyboardInterrupt: + raise SystemExit(0) From c5c5ad2366bb61734f64d9d17c111c5e3c7c955c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Jan 2012 18:13:55 +0530 Subject: [PATCH 06/19] ... --- src/calibre/ebooks/pml/pmlconverter.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 7bb23946ca..d4f409d3cc 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -80,7 +80,7 @@ class PML_HTMLizer(object): 'b': ('', ''), 'l': ('', ''), 'k': ('', ''), - 'FN': ('

', '

<return
'), + 'FN': ('

', '

return
'), 'SB': ('

', '

return
'), } @@ -143,7 +143,7 @@ class PML_HTMLizer(object): 'd', 'b', ] - + NEW_LINE_EXCHANGE_STATES = { 'h1': 'h1c', } @@ -230,7 +230,7 @@ class PML_HTMLizer(object): div = [] span = [] other = [] - + for key, val in state.items(): if key in self.NEW_LINE_EXCHANGE_STATES and val[0]: state[self.NEW_LINE_EXCHANGE_STATES[key]] = val @@ -644,7 +644,7 @@ class PML_HTMLizer(object): empty_count = 0 text = self.end_line() parsed.append(text) - + # Basic indent will be set if the \t starts the line or # if we are in a continuing \t block. if basic_indent: @@ -666,7 +666,7 @@ class PML_HTMLizer(object): parsed.append(self.STATES_TAGS['T'][1]) indent_state['T'] = False adv_indent_val = '' - + output.append(u''.join(parsed)) line.close() @@ -677,7 +677,7 @@ class PML_HTMLizer(object): def get_toc(self): ''' Toc can have up to 5 levels, 0 - 4 inclusive. - + This function will add items to their appropriate depth in the TOC tree. If the specified depth is invalid (item would not have a valid parent) add From 6f1824c2eb90b4ab41855b61fc53cc4252469848 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Jan 2012 19:31:37 +0530 Subject: [PATCH 07/19] Goal by faber1971 --- recipes/goal.recipe | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 recipes/goal.recipe diff --git a/recipes/goal.recipe b/recipes/goal.recipe new file mode 100644 index 0000000000..23ccbb4744 --- /dev/null +++ b/recipes/goal.recipe @@ -0,0 +1,13 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1325677767(BasicNewsRecipe): + title = u'Goal' + oldest_article = 1 + language = 'it' + max_articles_per_feed = 100 + auto_cleanup = True + remove_tags_after = [dict(id='article_content')] + feeds = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')] + __author__ = 'faber1971' + description = 'Sports news from Italy' + From 5fb8a6d833ca83df9ce91ab28c0b0e55dfb7b767 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Jan 2012 21:50:52 +0530 Subject: [PATCH 08/19] EPUB CFI basically works. Now for the more comprehensive testing. --- src/calibre/ebooks/oeb/display/cfi.coffee | 57 +++++++++-------- .../ebooks/oeb/display/test/cfi-test.coffee | 58 +++++++++++++++--- .../ebooks/oeb/display/test/index.html | 2 + .../ebooks/oeb/display/test/marker.png | Bin 0 -> 751 bytes src/calibre/utils/coffeescript.py | 5 +- 5 files changed, 87 insertions(+), 35 deletions(-) create mode 100644 src/calibre/ebooks/oeb/display/test/marker.png diff --git a/src/calibre/ebooks/oeb/display/cfi.coffee b/src/calibre/ebooks/oeb/display/cfi.coffee index dea5b2fa4b..e76886575d 100644 --- a/src/calibre/ebooks/oeb/display/cfi.coffee +++ b/src/calibre/ebooks/oeb/display/cfi.coffee @@ -58,16 +58,6 @@ get_current_time = (target) -> # {{{ fstr(ans) # }}} -set_current_time = (target, val) -> # {{{ - if target.currentTime == undefined - return - if target.readyState == 4 or target.readyState == "complete" - target.currentTime = val - else - fn = -> target.currentTime = val - target.addEventListener("canplay", fn, false) - -#}}} class CanonicalFragmentIdentifier @@ -76,13 +66,23 @@ class CanonicalFragmentIdentifier constructor: () -> + set_current_time: (target, val) -> # {{{ + if target.currentTime == undefined + return + if target.readyState == 4 or target.readyState == "complete" + target.currentTime = val + else + fn = -> target.currentTime = val + target.addEventListener("canplay", fn, false) + #}}} + encode: (doc, node, offset, tail) -> # {{{ cfi = tail or "" # Handle the offset, if any switch node.nodeType when 1 # Element node - if typeoff(offset) == 'number' + if typeof(offset) == 'number' node = node.childNodes.item(offset) when 3, 4, 5, 6 # Text/entity/CDATA node offset or= 0 @@ -136,7 +136,7 @@ class CanonicalFragmentIdentifier node = doc until cfi.length < 1 or error - if ( (r = cfi.match(simple_node_regex)) is not null ) # Path step + if (r = cfi.match(simple_node_regex)) # Path step target = parseInt(r[1]) assertion = r[2] if assertion @@ -318,22 +318,31 @@ class CanonicalFragmentIdentifier try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}] else try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}] - k = 0 a = null rects = null node_len = node.nodeValue.length - until rects or rects.length or k >= try_list.length - t = try_list[k++] - start_offset = r.offset + t.start - end_offset = r.offset + t.end - a = t.a - if start_offset < 0 or end_offset >= node_len - continue - range.setStart(node, start_offset) - range.setEnd(node, end_offset) - rects = range.getClientRects() + offset = r.offset + for i in [0, 1] + # Try reducing the offset by 1 if we get no match as if it refers to the position after the + # last character we wont get a match with getClientRects + offset = r.offset - i + if offset < 0 + offset = 0 + k = 0 + until rects?.length or k >= try_list.length + t = try_list[k++] + start_offset = offset + t.start + end_offset = offset + t.end + a = t.a + if start_offset < 0 or end_offset >= node_len + continue + range.setStart(node, start_offset) + range.setEnd(node, end_offset) + rects = range.getClientRects() + if rects?.length + break - if not rects or not rects.length + if not rects?.length log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }") return null diff --git a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee index 056d24b396..e371bab4df 100644 --- a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee +++ b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee @@ -6,19 +6,59 @@ Released under the GPLv3 License ### +log = (error) -> + if error + if window?.console?.log + window.console.log(error) + else if process?.stdout?.write + process.stdout.write(error + '\n') + viewport_top = (node) -> $(node).offset().top - window.pageYOffset viewport_left = (node) -> $(node).offset().left - window.pageXOffset -window.onload = -> - h1 = document.getElementsByTagName('h1')[0] - x = h1.scrollLeft + 150 - y = viewport_top(h1) + h1.offsetHeight/2 - e = document.elementFromPoint x, y - if e.getAttribute('id') != 'first-h1' - alert 'Failed to find top h1' - return - alert window.cfi.at x, y +show_cfi = (dont_seek) -> + if window.current_cfi + pos = window.cfi.point(window.current_cfi) + ms = document.getElementById("marker").style + if pos + ms.visibility = "visible" + ms.top = (pos.y - 30) + window.scrollY + "px" + ms.left = (pos.x - 1) + window.scrollX + "px" + if not dont_seek + if typeof pos.time == "number" + window.cfi.set_current_time(pos.node, pos.time) + scrollTo(0, pos.y - 30) + null + +RELOAD = true + +mark_and_reload = (evt) -> + window.current_cfi = window.cfi.at(evt.clientX, evt.clientY) + if not RELOAD + show_cfi(true) + if window.current_cfi + fn = () -> + newloc = window.location.href.replace(/#.*$/, '') + "#epubcfi(#{ window.current_cfi })" + window.location.replace(newloc) + if RELOAD + window.location.reload() + + setTimeout(fn, 1) + null + +window.onload = -> + window.onscroll = show_cfi + window.onresize = show_cfi + document.onclick = mark_and_reload + for iframe in document.getElementsByTagName("iframe") + iframe.contentWindow.onscroll = show_cfi + r = location.hash.match(/#epubcfi\((.+)\)$/) + if r + window.current_cfi = r[1] + setTimeout(show_cfi, 1) + null + diff --git a/src/calibre/ebooks/oeb/display/test/index.html b/src/calibre/ebooks/oeb/display/test/index.html index 1b93bb9739..4ab7dca502 100644 --- a/src/calibre/ebooks/oeb/display/test/index.html +++ b/src/calibre/ebooks/oeb/display/test/index.html @@ -8,6 +8,8 @@

Testing CFI functionality

+ +

0123

diff --git a/src/calibre/ebooks/oeb/display/test/marker.png b/src/calibre/ebooks/oeb/display/test/marker.png new file mode 100644 index 0000000000000000000000000000000000000000..6dcc1fb7ba693b0af2e108a59e3e99c439bfdc55 GIT binary patch literal 751 zcmVvA>c ziyb~c5Rlt>S1qO$pR{q*&Xq}-IOlYj{(#*&8t@8Z^QX*Y45*O?UsO3>W%~=Xw^J? zB69JK_?I6^_rh!GsAUM!1-ankJ(2I%M9#lUzKLB9b|o+cuzbSe>kSMLsqdc<5dweN zHq1I(J%RV<-kyH(b5rE#p?xz+@f(+eonnB#gjRDkt*gveMj&-V)(Uw-cX9u!34Dg% zorHnNz(kNXRzd&=?cmThQqf5ml-Nt~{xTAtBX}4;6+tQ%690wBl?O>I^btoi*IzaCjXEG%C3AE{QDiXgtoC(!AMi_5P;K4-O$qU`lx~d8qZ%- zLgUR_$LW{XEvwc6v%`(h2TqATkBfkpBA_J3(0G!PppO}7Bv$mSn#IHnly=^;A)}3F z$KX0oT>8I@1zH##<4(r}YFMlg5tQJ}^)lM$5n!}8)9V$x8o=@nAJ7eO$270fmAMb3 z=Dx3~*jp5kAUf6@FC|{a{{-JPUd4K`m@44Nc55yg`HNi8%sdBbkua~nR{^)Vz#8p~ zuHsdto-u!5Ud0(?Wm2oG1v Date: Thu, 5 Jan 2012 08:55:07 +0530 Subject: [PATCH 09/19] money.pl by intromatyk --- recipes/money_pl.recipe | 76 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 recipes/money_pl.recipe diff --git a/recipes/money_pl.recipe b/recipes/money_pl.recipe new file mode 100644 index 0000000000..075264f8f7 --- /dev/null +++ b/recipes/money_pl.recipe @@ -0,0 +1,76 @@ +import re + +from calibre.web.feeds.news import BasicNewsRecipe + +class FocusRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = u'intromatyk ' + language = 'pl' + version = 1 + + title = u'Money.pl' + category = u'News' + description = u'Informacje finansowe z kraju i ze świata. Aktualne i archiwalne: notowania giełdowe, kursy walut, wskaźniki gospodarcze.' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 1 + max_articles_per_feed = 100000 + recursions = 0 + + no_stylesheets = True + remove_javascript = True + + simultaneous_downloads = 2 + + r = re.compile('.*(?Phttp:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*') + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'artykul'})) + remove_tags = [dict(name='ul', attrs={'class':'socialStuff'})] + + extra_css = ''' + body {font-family: Arial,Helvetica,sans-serif ;} + h1{text-align: left;} + h2{font-size: medium; font-weight: bold;} + p.lead {font-weight: bold; text-align: left;} + .authordate {font-size: small; color: #696969;} + .fot{font-size: x-small; color: #666666;} + ''' + + feeds = [ + ('Wiadomosci z kraju', 'http://money.pl.feedsportal.com/c/33900/f/612847/index.rss'), + ('Wiadomosci ze swiata', 'http://money.pl.feedsportal.com/c/33900/f/612848/index.rss'), + ('Gospodarka', 'http://money.pl.feedsportal.com/c/33900/f/612849/index.rss'), + ('Waluty', 'http://money.pl.feedsportal.com/c/33900/f/612850/index.rss'), + ('Gielda', 'http://money.pl.feedsportal.com/c/33900/f/612851/index.rss'), + ('Banki', 'http://money.pl.feedsportal.com/c/33900/f/612852/index.rss'), + ('Fundusze', 'http://money.pl.feedsportal.com/c/33900/f/612853/index.rss'), + ('Emerytury', 'http://money.pl.feedsportal.com/c/33900/f/612854/index.rss'), + ('Podatki', 'http://money.pl.feedsportal.com/c/33900/f/612855/index.rss'), + ('Ubezpieczenia', 'http://money.pl.feedsportal.com/c/33900/f/612856/index.rss'), + ('Poradniki', 'http://money.pl.feedsportal.com/c/33900/f/612857/index.rss'), + ('Raporty', 'http://money.pl.feedsportal.com/c/33900/f/612858/index.rss'), + ('Motoryzacja', 'http://money.pl.feedsportal.com/c/33900/f/612859/index.rss'), + ('Manager', 'http://money.pl.feedsportal.com/c/33900/f/612860/index.rss'), + ('Dla firm', 'http://money.pl.feedsportal.com/c/33900/f/612861/index.rss'), + ('Prawo', 'http://money.pl.feedsportal.com/c/33900/f/612862/index.rss'), + ('Nieruchomosci', 'http://money.pl.feedsportal.com/c/33900/f/612863/index.rss'), + ('Praca', 'http://money.pl.feedsportal.com/c/33900/f/612864/index.rss'), + + + ] + + def print_version(self, url): + if url.count ('money.pl.feedsportal.com'): + u = url.find('0Cartykul0C') + u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:] + u = u.replace('0C', '/') + u = u.replace('A', '') + u = u.replace ('0E','-') + u = u.replace ('0P',';') + u = u.replace ('0H',',') + u = u.replace ('0B','.') + u = u.replace (',0,',',-1,') + u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '') + else: + u = url.replace('/nc/1','/do-druku/1') + return u From ee8de4b180ae62bfa97060c829cbd399a90081cd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 09:26:09 +0530 Subject: [PATCH 10/19] Improve Alternet --- recipes/alternet.recipe | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/recipes/alternet.recipe b/recipes/alternet.recipe index f885225d31..e58376cc42 100644 --- a/recipes/alternet.recipe +++ b/recipes/alternet.recipe @@ -10,11 +10,11 @@ class Alternet(BasicNewsRecipe): category = 'News, Magazine' description = 'News magazine and online community' feeds = [ - (u'Front Page', u'http://feeds.feedblitz.com/alternet'), - (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'), - (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'), - (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage') - ] + (u'Front Page', u'http://feeds.feedblitz.com/alternet'), + (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'), + (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'), + (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage') + ] remove_attributes = ['width', 'align','cellspacing'] remove_javascript = True use_embedded_content = False @@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe): self.temp_files[-1].write(html) self.temp_files[-1].close() return self.temp_files[-1].name + + conversion_options = {'linearize_tables': True} From 50bbff2417f00d434409e697d53d1503f83c77db Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 09:34:16 +0530 Subject: [PATCH 11/19] ... --- src/calibre/gui2/add.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 2fc14c8238..7cdac3b845 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -14,7 +14,7 @@ from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata import MetaInformation from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG from calibre.utils.config import prefs -from calibre import prints +from calibre import prints, force_unicode, as_unicode single_shot = partial(QTimer.singleShot, 75) @@ -66,7 +66,8 @@ class RecursiveFind(QThread): # {{{ if self.canceled: return self.update.emit( - _('Searching in')+' '+dirpath[0]) + _('Searching in')+' '+force_unicode(dirpath[0], + filesystem_encoding)) self.books += list(self.db.find_books_in_directory(dirpath[0], self.single_book_per_directory)) @@ -82,10 +83,7 @@ class RecursiveFind(QThread): # {{{ except Exception as err: import traceback traceback.print_exc() - try: - msg = unicode(err) - except: - msg = repr(err) + msg = as_unicode(err) self.found.emit(msg) return From c1376fbb381426516c037484280b0da39c45af30 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 09:36:15 +0530 Subject: [PATCH 12/19] Use Qt's builtin swipe gesture recognizer instead of rolling our own --- src/calibre/gui2/viewer/documentview.py | 39 +++++++--------- src/calibre/gui2/viewer/gestures.py | 61 ------------------------- 2 files changed, 17 insertions(+), 83 deletions(-) delete mode 100644 src/calibre/gui2/viewer/gestures.py diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 4e8d59f61d..70da33b7f2 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -12,14 +12,13 @@ from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, QPainter, QPalette, QBrush, QFontDatabase, QDialog, QColor, QPoint, QImage, QRegion, QVariant, QIcon, QFont, pyqtSignature, QAction, QByteArray, QMenu, - pyqtSignal) + pyqtSignal, QSwipeGesture) from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings from calibre.utils.config import Config, StringConfig from calibre.utils.localization import get_language from calibre.gui2.viewer.config_ui import Ui_Dialog from calibre.gui2.viewer.flip import SlideFlip -from calibre.gui2.viewer.gestures import Gestures from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig from calibre.constants import iswindows from calibre import prints, guess_type @@ -514,7 +513,6 @@ class DocumentView(QWebView): # {{{ def __init__(self, *args): QWebView.__init__(self, *args) self.flipper = SlideFlip(self) - self.gestures = Gestures() self.is_auto_repeat_event = False self.debug_javascript = False self.shortcuts = Shortcuts(SHORTCUTS, 'shortcuts/viewer') @@ -582,6 +580,7 @@ class DocumentView(QWebView): # {{{ else: m.addAction(name, a[key], self.shortcuts.get_sequences(key)[0]) self.goto_location_action.setMenu(self.goto_location_menu) + self.grabGesture(Qt.SwipeGesture) def goto_next_section(self, *args): if self.manager is not None: @@ -1047,28 +1046,24 @@ class DocumentView(QWebView): # {{{ self.manager.viewport_resized(self.scroll_fraction) def event(self, ev): - typ = ev.type() - if typ == ev.TouchBegin: - try: - self.gestures.start_gesture('touch', ev) - except: - import traceback - traceback.print_exc() - elif typ == ev.TouchEnd: - try: - gesture = self.gestures.end_gesture('touch', ev, self.rect()) - except: - import traceback - traceback.print_exc() - if gesture is not None: - ev.accept() - if gesture == 'lineleft': - self.next_page() - elif gesture == 'lineright': - self.previous_page() + if ev.type() == ev.Gesture: + swipe = ev.gesture(Qt.SwipeGesture) + if swipe is not None: + self.handle_swipe(swipe) return True return QWebView.event(self, ev) + def handle_swipe(self, swipe): + if swipe.state() == Qt.GestureFinished: + if swipe.horizontalDirection() == QSwipeGesture.Left: + self.previous_page() + elif swipe.horizontalDirection() == QSwipeGesture.Right: + self.next_page() + elif swipe.verticalDirection() == QSwipeGesture.Up: + self.goto_previous_section() + elif swipe.horizontalDirection() == QSwipeGesture.Down: + self.goto_next_section() + def mouseReleaseEvent(self, ev): opos = self.document.ypos ret = QWebView.mouseReleaseEvent(self, ev) diff --git a/src/calibre/gui2/viewer/gestures.py b/src/calibre/gui2/viewer/gestures.py deleted file mode 100644 index 86d2f842b9..0000000000 --- a/src/calibre/gui2/viewer/gestures.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -import time - -class Gestures(object): - - def __init__(self): - self.in_progress = {} - - def get_boundary_point(self, event): - t = time.time() - id_ = None - if hasattr(event, 'touchPoints'): - tps = list(event.touchPoints()) - tp = None - for t in tps: - if t.isPrimary(): - tp = t - break - if tp is None: - tp = tps[0] - gp, p = tp.screenPos(), tp.pos() - id_ = tp.id() - else: - gp, p = event.globalPos(), event.pos() - return (t, gp, p, id_) - - def start_gesture(self, typ, event): - self.in_progress[typ] = self.get_boundary_point(event) - - def is_in_progress(self, typ): - return typ in self.in_progress - - def end_gesture(self, typ, event, widget_rect): - if not self.is_in_progress(typ): - return - start = self.in_progress[typ] - end = self.get_boundary_point(event) - if start[3] != end[3]: - return - timespan = end[0] - start[0] - start_pos, end_pos = start[1], end[1] - xspan = end_pos.x() - start_pos.x() - yspan = end_pos.y() - start_pos.y() - - width = widget_rect.width() - - if timespan < 1.1 and abs(xspan) >= width/5. and \ - abs(yspan) < abs(xspan)/5.: - # Quick horizontal gesture - return 'line'+('left' if xspan < 0 else 'right') - - return None - - - From fd8e334e1621ede1a061dc689408ebd04ada4254 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 12:43:44 +0530 Subject: [PATCH 13/19] EPUB CFI now works on firefox --- src/calibre/ebooks/oeb/display/cfi.coffee | 71 +++++++++++++++++++++-- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/oeb/display/cfi.coffee b/src/calibre/ebooks/oeb/display/cfi.coffee index e76886575d..076277eaed 100644 --- a/src/calibre/ebooks/oeb/display/cfi.coffee +++ b/src/calibre/ebooks/oeb/display/cfi.coffee @@ -5,14 +5,17 @@ Copyright 2011, Kovid Goyal Released under the GPLv3 License Based on code originally written by Peter Sorotkin (http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js) + + This script requires the createRange method on the document object that must create a W3C compliant range object ### -# -log = (error) -> + +log = (error) -> # {{{ if error if window?.console?.log window.console.log(error) else if process?.stdout?.write process.stdout.write(error + '\n') +# }}} # CFI escaping {{{ escape_for_cfi = (raw) -> @@ -58,6 +61,56 @@ get_current_time = (target) -> # {{{ fstr(ans) # }}} +# Equivalent for caretRangeFromPoint for non WebKit browsers {{{ +range_has_point = (range, x, y) -> + for rect in range.getClientRects() + if (rect.left <= x <= rect.right) and (rect.top <= y <= rect.bottom) + return true + return false + +offset_in_text_node = (node, range, x, y) -> + limits = [0, node.nodeValue.length] + while limits[0] != limits[1] + pivot = Math.floor( (limits[0] + limits[1]) / 2 ) + lr = [limits[0], pivot] + rr = [pivot+1, limits[1]] + range.setStart(node, pivot) + range.setEnd(node, pivot+1) + if range_has_point(range, x, y) + return pivot + range.setStart(node, rr[0]) + range.setEnd(node, rr[1]) + if range_has_point(range, x, y) + limits = rr + continue + range.setStart(node, lr[0]) + range.setEnd(node, lr[1]) + if range_has_point(range, x, y) + limits = lr + continue + break + return limits[0] + +find_offset_for_point = (x, y, node, cdoc) -> + range = cdoc.createRange() + child = node.firstChild + last_child = null + while child + if child.nodeType in [3, 4, 5, 6] and child.nodeValue?.length + range.setStart(child, 0) + range.setEnd(child, child.nodeValue.length) + if range_has_point(range, x, y) + return [child, offset_in_text_node(child, range, x, y)] + last_child = child + child = child.nextSibling + + if not last_child + throw "#{node} has no children" + # The point must be after the last bit of text + pos = 0 + return [last_child, last_child.nodeValue.length] + +# }}} class CanonicalFragmentIdentifier @@ -65,6 +118,11 @@ class CanonicalFragmentIdentifier # object constructor: () -> + this.COMPAT_ERR = "Your browser does not support the createRange function. Update it to a newer version." + + is_compatible: () -> + if not window.document.createRange + throw this.COMPAT_ERR set_current_time: (target, val) -> # {{{ if target.currentTime == undefined @@ -290,9 +348,12 @@ class CanonicalFragmentIdentifier if range target = range.startContainer offset = range.startOffset + else + throw "Failed to find range from point (#{ x }, #{ y })" + else if cdoc.createRange + [target, offset] = find_offset_for_point(x, y, target, cdoc) else - # TODO: implement a span bisection algorithm for UAs - # without caretRangeFromPoint (Gecko, IE) + throw this.COMPAT_ERR this.encode(doc, target, offset, tail) # }}} @@ -313,6 +374,8 @@ class CanonicalFragmentIdentifier if typeof(r.offset) == "number" # Character offset + if not ndoc.createRange + throw this.COMPAT_ERR range = ndoc.createRange() if r.forward try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}] From e66b4323fdd342c6382b99e1b81b98297dda629d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 13:17:47 +0530 Subject: [PATCH 14/19] EPUB CFI works on IE 9. That was easier than I thought :) --- src/calibre/ebooks/oeb/display/cfi.coffee | 27 ++++++++++++++----- .../ebooks/oeb/display/test/cfi-test.coffee | 10 +++---- src/calibre/utils/coffeescript.py | 2 +- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/oeb/display/cfi.coffee b/src/calibre/ebooks/oeb/display/cfi.coffee index 076277eaed..29815c8f77 100644 --- a/src/calibre/ebooks/oeb/display/cfi.coffee +++ b/src/calibre/ebooks/oeb/display/cfi.coffee @@ -4,9 +4,11 @@ ### Copyright 2011, Kovid Goyal Released under the GPLv3 License - Based on code originally written by Peter Sorotkin (http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js) + Based on code originally written by Peter Sorotkin + (http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js) - This script requires the createRange method on the document object that must create a W3C compliant range object + To check if this script is compatible with the current browser, call + window.cfi.is_compatible() it will throw an exception if not compatible. ### log = (error) -> # {{{ @@ -118,11 +120,24 @@ class CanonicalFragmentIdentifier # object constructor: () -> - this.COMPAT_ERR = "Your browser does not support the createRange function. Update it to a newer version." + this.CREATE_RANGE_ERR = "Your browser does not support the createRange function. Update it to a newer version." + this.IE_ERR = "Your browser is too old. You need Internet Explorer version 8 or newer." + this.is_compatible() is_compatible: () -> if not window.document.createRange - throw this.COMPAT_ERR + throw this.CREATE_RANGE_ERR + # Check if Internet Explorer >= 8 as getClientRects returns physical + # rather than logical pixels on older IE + div = document.createElement('div') + ver = 3 + while true + div.innerHTML = "" + if div.getElementsByTagName('i').length == 0 + break + if ver > 4 and ver < 8 + # We have IE < 8 + throw this.IE_ERR set_current_time: (target, val) -> # {{{ if target.currentTime == undefined @@ -353,7 +368,7 @@ class CanonicalFragmentIdentifier else if cdoc.createRange [target, offset] = find_offset_for_point(x, y, target, cdoc) else - throw this.COMPAT_ERR + throw this.CREATE_RANGE_ERR this.encode(doc, target, offset, tail) # }}} @@ -375,7 +390,7 @@ class CanonicalFragmentIdentifier if typeof(r.offset) == "number" # Character offset if not ndoc.createRange - throw this.COMPAT_ERR + throw this.CREATE_RANGE_ERR range = ndoc.createRange() if r.forward try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}] diff --git a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee index e371bab4df..5c3bb6ee16 100644 --- a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee +++ b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee @@ -22,18 +22,18 @@ viewport_left = (node) -> show_cfi = (dont_seek) -> if window.current_cfi pos = window.cfi.point(window.current_cfi) - ms = document.getElementById("marker").style if pos - ms.visibility = "visible" - ms.top = (pos.y - 30) + window.scrollY + "px" - ms.left = (pos.x - 1) + window.scrollX + "px" + ms = $("#marker") + ms.offset({left:pos.x-1, top:pos.y-30}) + ms.css('visibility', 'visible') if not dont_seek if typeof pos.time == "number" window.cfi.set_current_time(pos.node, pos.time) scrollTo(0, pos.y - 30) null -RELOAD = true +# Set this to true to have the browser reload the page with the current cfi +RELOAD = false mark_and_reload = (evt) -> window.current_cfi = window.cfi.at(evt.clientX, evt.clientY) diff --git a/src/calibre/utils/coffeescript.py b/src/calibre/utils/coffeescript.py index 13db6011f0..057cfeef17 100644 --- a/src/calibre/utils/coffeescript.py +++ b/src/calibre/utils/coffeescript.py @@ -70,7 +70,7 @@ class HTTPD(SocketServer.TCPServer): def serve(resources={}, port=8000): Handler.special_resources = resources - httpd = HTTPD(('localhost', port), Handler) + httpd = HTTPD(('0.0.0.0', port), Handler) print('serving at localhost:%d'%port) try: httpd.serve_forever() From 54d4991b860419d66ed0dd45685bd4c53e377dff Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 13:19:12 +0530 Subject: [PATCH 15/19] ... --- src/calibre/ebooks/oeb/display/cfi.coffee | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/oeb/display/cfi.coffee b/src/calibre/ebooks/oeb/display/cfi.coffee index 29815c8f77..3bc2828582 100644 --- a/src/calibre/ebooks/oeb/display/cfi.coffee +++ b/src/calibre/ebooks/oeb/display/cfi.coffee @@ -119,12 +119,12 @@ class CanonicalFragmentIdentifier # This class is a namespace to expose CFI functions via the window.cfi # object - constructor: () -> + constructor: () -> # {{{ this.CREATE_RANGE_ERR = "Your browser does not support the createRange function. Update it to a newer version." this.IE_ERR = "Your browser is too old. You need Internet Explorer version 8 or newer." - this.is_compatible() + # }}} - is_compatible: () -> + is_compatible: () -> # {{{ if not window.document.createRange throw this.CREATE_RANGE_ERR # Check if Internet Explorer >= 8 as getClientRects returns physical @@ -138,6 +138,7 @@ class CanonicalFragmentIdentifier if ver > 4 and ver < 8 # We have IE < 8 throw this.IE_ERR + # }}} set_current_time: (target, val) -> # {{{ if target.currentTime == undefined From 645d7e95c7f8c9822218a5994a190a1def501313 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 16:34:36 +0530 Subject: [PATCH 16/19] Update Kopalnia Wiedzy --- recipes/kopalniawiedzy.recipe | 138 +++++++++++++++++----------------- 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/recipes/kopalniawiedzy.recipe b/recipes/kopalniawiedzy.recipe index 628dc1b2d2..a7b932f618 100644 --- a/recipes/kopalniawiedzy.recipe +++ b/recipes/kopalniawiedzy.recipe @@ -1,79 +1,79 @@ __license__ = 'GPL v3' -__copyright__ = '2011, Attis ' +__copyright__ = '2011 Attis , 2012 Tomasz Długosz ' __version__ = 'v. 0.1' import re from calibre.web.feeds.recipes import BasicNewsRecipe class KopalniaWiedzy(BasicNewsRecipe): - title = u'Kopalnia Wiedzy' - publisher = u'Kopalnia Wiedzy' - description = u'Ciekawostki ze świata nauki i techniki' - encoding = 'utf-8' - __author__ = 'Attis' - language = 'pl' - oldest_article = 7 - max_articles_per_feed = 100 - INDEX = u'http://kopalniawiedzy.pl/' - remove_javascript = True - no_stylesheets = True - - remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}] - remove_tags_after = dict(attrs={'class':'ad-square'}) - keep_only_tags = [dict(name="div", attrs={'id':'articleContent'})] - extra_css = '.topimage {margin-top: 30px}' - - preprocess_regexps = [ - (re.compile(u''), - lambda match: '' ), - (re.compile(u'

'), - lambda match: '') - ] - - feeds = [ - (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'), - (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'), - (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'), - (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'), - (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'), - (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss') - ] - - def is_link_wanted(self, url, tag): - return tag['class'] == 'next' - - def remove_beyond(self, tag, next): - while tag is not None and getattr(tag, 'name', None) != 'body': - after = getattr(tag, next) - while after is not None: - ns = getattr(tag, next) - after.extract() - after = ns - tag = tag.parent - - def append_page(self, soup, appendtag, position): - pager = soup.find('a',attrs={'class':'next'}) - if pager: - nexturl = self.INDEX + pager['href'] - soup2 = self.index_to_soup(nexturl) - texttag = soup2.find('div', attrs={'id':'articleContent'}) - - tag = texttag.find(attrs={'class':'pages'}) - self.remove_beyond(tag, 'nextSibling') - - newpos = len(texttag.contents) - self.append_page(soup2,texttag,newpos) + title = u'Kopalnia Wiedzy' + publisher = u'Kopalnia Wiedzy' + description = u'Ciekawostki ze świata nauki i techniki' + encoding = 'utf-8' + __author__ = 'Attis & Tomasz Długosz' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + INDEX = u'http://kopalniawiedzy.pl/' + remove_javascript = True + no_stylesheets = True - appendtag.insert(position,texttag) + remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}] + remove_tags_after = dict(attrs={'class':'ad-square'}) + keep_only_tags = [dict(name="div", attrs={'class':'article-text text-small'})] + extra_css = '.topimage {margin-top: 30px}' + + preprocess_regexps = [ + (re.compile(u''), + lambda match: '' ), + (re.compile(u'

'), + lambda match: '') + ] + + feeds = [ + (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'), + (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'), + (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'), + (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'), + (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'), + (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss') + ] + + def is_link_wanted(self, url, tag): + return tag['class'] == 'next' + + def remove_beyond(self, tag, next): + while tag is not None and getattr(tag, 'name', None) != 'body': + after = getattr(tag, next) + while after is not None: + ns = getattr(tag, next) + after.extract() + after = ns + tag = tag.parent + + def append_page(self, soup, appendtag, position): + pager = soup.find('a',attrs={'class':'next'}) + if pager: + nexturl = self.INDEX + pager['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'id':'articleContent'}) + + tag = texttag.find(attrs={'class':'pages'}) + self.remove_beyond(tag, 'nextSibling') + + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + + appendtag.insert(position,texttag) - def preprocess_html(self, soup): - self.append_page(soup, soup.body, 3) - - for item in soup.findAll('div',attrs={'class':'pages'}): - item.extract() - - for item in soup.findAll('p', attrs={'class':'wykop'}): - item.extract() - - return soup + def preprocess_html(self, soup): + self.append_page(soup, soup.body, 3) + + for item in soup.findAll('div',attrs={'class':'pages'}): + item.extract() + + for item in soup.findAll('p', attrs={'class':'wykop'}): + item.extract() + + return soup From 383fe346dd9102020b3b44a4954672af3207e589 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 17:34:13 +0530 Subject: [PATCH 17/19] EPUB CFI: Handle identifiers pointing to content inside a scrollable element --- src/calibre/ebooks/oeb/display/cfi.coffee | 76 +++++++++++++++---- .../ebooks/oeb/display/test/cfi-test.coffee | 31 ++++---- .../ebooks/oeb/display/test/index.html | 56 +++++++++++++- 3 files changed, 130 insertions(+), 33 deletions(-) diff --git a/src/calibre/ebooks/oeb/display/cfi.coffee b/src/calibre/ebooks/oeb/display/cfi.coffee index 3bc2828582..2a15f03b0d 100644 --- a/src/calibre/ebooks/oeb/display/cfi.coffee +++ b/src/calibre/ebooks/oeb/display/cfi.coffee @@ -63,6 +63,18 @@ get_current_time = (target) -> # {{{ fstr(ans) # }}} +viewport_to_document = (x, y, doc) -> # {{{ + win = doc.defaultView + x += win.scrollX + y += win.scrollY + if doc != window.document + # We are in a frame + node = win.frameElement + rect = node.getBoundingClientRect() + return viewport_to_document(rect.left, rect.top, node.ownerDocument) + return [x + win.scrollX, y + win.scrollY] +# }}} + # Equivalent for caretRangeFromPoint for non WebKit browsers {{{ range_has_point = (range, x, y) -> for rect in range.getClientRects() @@ -297,7 +309,9 @@ class CanonicalFragmentIdentifier next = false while true nn = node.nextSibling - if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata + if not nn + break + if nn.nodeType in [3, 4, 5, 6] and nn.nodeValue?.length # Text node, entity, cdata next = nn break if not next @@ -387,6 +401,7 @@ class CanonicalFragmentIdentifier nwin = ndoc.defaultView x = null y = null + range = null if typeof(r.offset) == "number" # Character offset @@ -421,29 +436,62 @@ class CanonicalFragmentIdentifier if rects?.length break + if not rects?.length log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }") return null - rect = rects[0] - x = (a*rect.left + (1-a)*rect.right) - y = (rect.top + rect.bottom)/2 else - x = node.offsetLeft - nwin.scrollX - y = node.offsetTop - nwin.scrollY - if typeof(r.x) == "number" and node.offsetWidth - x += (r.x*node.offsetWidth)/100 - y += (r.y*node.offsetHeight)/100 + [x, y] = [r.x, r.y] - until ndoc == doc - node = nwin.frameElement + {x:x, y:y, node:r.node, time:r.time, range:range, a:a} + + # }}} + + scroll_to: (cfi, callback=false, doc=window?.document) -> # {{{ + point = this.point(cfi, doc) + if not point + log("No point found for cfi: #{ cfi }") + return + if typeof point.time == 'number' + this.set_current_time(point.node, point.time) + + if point.range != null + r = point.range + node = r.startContainer ndoc = node.ownerDocument nwin = ndoc.defaultView - x += node.offsetLeft - nwin.scrollX - y += node.offsetTop - nwin.scrollY + span = ndoc.createElement('span') + span.setAttribute('style', 'border-width: 0; padding: 0; margin: 0') + r.surroundContents(span) + span.scrollIntoView() + fn = -> + rect = span.getBoundingClientRect() + x = (point.a*rect.left + (1-point.a)*rect.right) + y = (rect.top + rect.bottom)/2 + [x, y] = viewport_to_document(x, y, ndoc) + span.outerHTML = span.innerHTML + if callback + callback(x, y) + else + node = point.node + nwin = node.ownerDocument.defaultView + node.scrollIntoView() - {x:x, y:y, node:r.node, time:r.time} + fn = -> + rect = node.getBoundingClientRect() + [x, y] = viewport_to_document(rect.left, rect.top, node.ownerDocument) + if typeof(point.x) == 'number' and node.offsetWidth + x += (r.x*node.offsetWidth)/100 + if typeof(point.y) == 'number' and node.offsetHeight + y += (r.y*node.offsetHeight)/100 + scrollTo(x, y) + if callback + callback(x, y) + setTimeout(fn, 10) + + null # }}} if window? diff --git a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee index 5c3bb6ee16..427a74c6e9 100644 --- a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee +++ b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee @@ -21,30 +21,29 @@ viewport_left = (node) -> show_cfi = (dont_seek) -> if window.current_cfi - pos = window.cfi.point(window.current_cfi) - if pos + fn = (x, y) -> ms = $("#marker") - ms.offset({left:pos.x-1, top:pos.y-30}) ms.css('visibility', 'visible') - if not dont_seek - if typeof pos.time == "number" - window.cfi.set_current_time(pos.node, pos.time) - scrollTo(0, pos.y - 30) + # This strange sequence is needed to get it to work in Chrome + # when called from the onload handler + ms.offset({left:x-1, top:y-30}) + ms.offset() + ms.offset({left:x-1, top:y-30}) + + + window.cfi.scroll_to(window.current_cfi, fn) null -# Set this to true to have the browser reload the page with the current cfi -RELOAD = false mark_and_reload = (evt) -> window.current_cfi = window.cfi.at(evt.clientX, evt.clientY) - if not RELOAD - show_cfi(true) if window.current_cfi fn = () -> - newloc = window.location.href.replace(/#.*$/, '') + "#epubcfi(#{ window.current_cfi })" + epubcfi = "#epubcfi(#{ window.current_cfi })" + newloc = window.location.href.replace(/#.*$/, '') + epubcfi window.location.replace(newloc) - if RELOAD - window.location.reload() + document.getElementById('current-cfi').innerHTML = window.current_cfi + window.location.reload() setTimeout(fn, 1) null @@ -58,7 +57,7 @@ window.onload = -> r = location.hash.match(/#epubcfi\((.+)\)$/) if r window.current_cfi = r[1] - setTimeout(show_cfi, 1) + document.getElementById('current-cfi').innerHTML = window.current_cfi + setTimeout(show_cfi, 100) null - diff --git a/src/calibre/ebooks/oeb/display/test/index.html b/src/calibre/ebooks/oeb/display/test/index.html index 4ab7dca502..f43848c5e7 100644 --- a/src/calibre/ebooks/oeb/display/test/index.html +++ b/src/calibre/ebooks/oeb/display/test/index.html @@ -5,11 +5,61 @@ + -

Testing CFI functionality

- -

0123

+
+

Testing EPUB CFI

+
Current CFI: None
+

A div with scrollbars

+
But I must explain to you how all this mistaken + idea of denouncing pleasure and praising pain was born and I + will give you a complete account of the system, and expound the + actual teachings of the great explorer of the truth, the + master-builder of human happiness. No one rejects, dislikes, or + avoids pleasure itself, because it is pleasure, but because + those who do not know how to pursue pleasure rationally + encounter consequences that are extremely painful. Nor again is + there anyone who loves or pursues or desires to obtain pain of + itself, because it is pain, but because occasionally + circumstances occur in which toil and pain can procure him some + great pleasure. To take a trivial example, which of us ever + undertakes laborious physical exercise, except to obtain some + advantage from it? But who has any right to find fault with a + man who chooses to enjoy a pleasure that has no annoying + consequences, or one who avoids a pain that produces no + resultant pleasure? On the other hand, we denounce with + righteous indignation and dislike men who are so beguiled and + demoralized by the charms of pleasure of the moment, so blinded + by desire, that they cannot foresee +
+
+ From 436d604f1ce46b94a193303add564ca3d5c8738e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 18:28:15 +0530 Subject: [PATCH 18/19] Various Italian news sources by faber1971 --- recipes/macity.recipe | 23 +++++++++++++++++++++++ recipes/wired_it.recipe | 12 ++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 recipes/macity.recipe create mode 100644 recipes/wired_it.recipe diff --git a/recipes/macity.recipe b/recipes/macity.recipe new file mode 100644 index 0000000000..b79fa972cf --- /dev/null +++ b/recipes/macity.recipe @@ -0,0 +1,23 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1325766771(BasicNewsRecipe): + title = u'Macity' + language = 'it' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + def get_article_url(self, article): + link = BasicNewsRecipe.get_article_url(self, article) + if link.split('/')[-1]=="story01.htm": + link=link.split('/')[-2] + a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L' , 'N' , 'S' ] + b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.'] + for i in range(0,len(a)): + link=link.replace('0'+a[-i],b[-i]) + return link + + feeds = [(u'Macity', u'http://www.macitynet.it.feedsportal.com/c/33714/f/599513/index.rss')] + __author__ = 'faber1971' + description = 'Apple and hi-tech news' + diff --git a/recipes/wired_it.recipe b/recipes/wired_it.recipe new file mode 100644 index 0000000000..2c1f8a172d --- /dev/null +++ b/recipes/wired_it.recipe @@ -0,0 +1,12 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1325758162(BasicNewsRecipe): + title = u'Wired' + language = 'it' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + remove_tags_after = [dict(name='div', attrs={'class':'article_content'})] + feeds = [(u'Wired', u'http://www.wired.it/rss.xml')] + __author__ = 'faber1971' + description = 'An American magazine that reports on how new technology affects culture, the economy, and politics' From 5b147cd745b4f26d2c57dda79daf3ee0692a8649 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 Jan 2012 19:01:01 +0530 Subject: [PATCH 19/19] ... --- src/calibre/ebooks/oeb/display/cfi.coffee | 11 ++++- .../ebooks/oeb/display/test/cfi-test.coffee | 43 ++++++++----------- .../ebooks/oeb/display/test/index.html | 5 +-- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/calibre/ebooks/oeb/display/cfi.coffee b/src/calibre/ebooks/oeb/display/cfi.coffee index 2a15f03b0d..5020174664 100644 --- a/src/calibre/ebooks/oeb/display/cfi.coffee +++ b/src/calibre/ebooks/oeb/display/cfi.coffee @@ -6,6 +6,10 @@ Released under the GPLv3 License Based on code originally written by Peter Sorotkin (http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js) + Improvements with respect to that code: + 1. Works on all browsers (WebKit, Firefox and IE >= 8) + 2. Works if the point is after the last text character in an element + 3. Works for elements that are scrollable (i.e. have their own scrollbars) To check if this script is compatible with the current browser, call window.cfi.is_compatible() it will throw an exception if not compatible. @@ -470,7 +474,12 @@ class CanonicalFragmentIdentifier x = (point.a*rect.left + (1-point.a)*rect.right) y = (rect.top + rect.bottom)/2 [x, y] = viewport_to_document(x, y, ndoc) - span.outerHTML = span.innerHTML + tn = if span.firstChild then span.firstChild.nodeValue else '' + tn = ndoc.createTextNode(tn) + p = span.parentNode + p.insertBefore(tn, span) + p.removeChild(span) + p.normalize() if callback callback(x, y) else diff --git a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee index 427a74c6e9..ab82c32df4 100644 --- a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee +++ b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee @@ -13,47 +13,42 @@ log = (error) -> else if process?.stdout?.write process.stdout.write(error + '\n') -viewport_top = (node) -> - $(node).offset().top - window.pageYOffset - -viewport_left = (node) -> - $(node).offset().left - window.pageXOffset - -show_cfi = (dont_seek) -> +show_cfi = () -> if window.current_cfi fn = (x, y) -> - ms = $("#marker") - ms.css('visibility', 'visible') - # This strange sequence is needed to get it to work in Chrome - # when called from the onload handler - ms.offset({left:x-1, top:y-30}) - ms.offset() - ms.offset({left:x-1, top:y-30}) - + ms = document.getElementById("marker").style + ms.display = 'block' + ms.top = y - 30 + 'px' + ms.left = x - 1 + 'px' window.cfi.scroll_to(window.current_cfi, fn) null - mark_and_reload = (evt) -> - window.current_cfi = window.cfi.at(evt.clientX, evt.clientY) - if window.current_cfi - fn = () -> + # Remove image in case the click was on the image itself, we want the cfi to + # be on the underlying element + ms = document.getElementById("marker") + ms.parentNode.removeChild(ms) + + fn = () -> + window.current_cfi = window.cfi.at(evt.clientX, evt.clientY) + if window.current_cfi epubcfi = "#epubcfi(#{ window.current_cfi })" newloc = window.location.href.replace(/#.*$/, '') + epubcfi window.location.replace(newloc) document.getElementById('current-cfi').innerHTML = window.current_cfi window.location.reload() - setTimeout(fn, 1) + setTimeout(fn, 1) null window.onload = -> - window.onscroll = show_cfi - window.onresize = show_cfi + try + window.cfi.is_compatible() + catch error + alert(error) + return document.onclick = mark_and_reload - for iframe in document.getElementsByTagName("iframe") - iframe.contentWindow.onscroll = show_cfi r = location.hash.match(/#epubcfi\((.+)\)$/) if r window.current_cfi = r[1] diff --git a/src/calibre/ebooks/oeb/display/test/index.html b/src/calibre/ebooks/oeb/display/test/index.html index f43848c5e7..afeae055a4 100644 --- a/src/calibre/ebooks/oeb/display/test/index.html +++ b/src/calibre/ebooks/oeb/display/test/index.html @@ -3,7 +3,6 @@ Testing CFI functionality -