0.8.33+

2025-11-09 16:23:22 -05:00 · 2012-01-05 06:35:23 -07:00 · 2012-01-05 06:35:23 -07:00 · b6c715e1e9
commit b6c715e1e9
parent 709947cf93 5b147cd745
20 changed files with 606 additions and 273 deletions
--- a/recipes/alternet.recipe
+++ b/recipes/alternet.recipe
@ -10,11 +10,11 @@ class Alternet(BasicNewsRecipe):
    category = 'News, Magazine'
    description = 'News magazine and online community'
    feeds          = [
-	(u'Front Page', u'http://feeds.feedblitz.com/alternet'),
-	(u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
-	(u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
-	(u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
-	]
+        (u'Front Page', u'http://feeds.feedblitz.com/alternet'),
+        (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
+        (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
+        (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
+        ]
    remove_attributes = ['width', 'align','cellspacing']
    remove_javascript = True
    use_embedded_content   = False
@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe):
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
+
+    conversion_options = {'linearize_tables': True}
--- a/recipes/goal.recipe
+++ b/recipes/goal.recipe
@ -0,0 +1,13 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325677767(BasicNewsRecipe):
+    title          = u'Goal'
+    oldest_article = 1
+    language = 'it'
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    remove_tags_after = [dict(id='article_content')]
+    feeds          = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
+    __author__      = 'faber1971'
+    description    = 'Sports news from Italy'
+
--- a/recipes/kopalniawiedzy.recipe
+++ b/recipes/kopalniawiedzy.recipe
@ -1,79 +1,79 @@
 __license__   = 'GPL v3'
-__copyright__ = '2011, Attis <attis@attis.one.pl>'
+__copyright__ = '2011 Attis <attis@attis.one.pl>, 2012 Tomasz Długosz <tomek3d@gmail.com>'
 __version__ = 'v. 0.1'

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class KopalniaWiedzy(BasicNewsRecipe):
-		title          = u'Kopalnia Wiedzy'
-		publisher      = u'Kopalnia Wiedzy'
-		description    = u'Ciekawostki ze świata nauki i techniki'
-		encoding       = 'utf-8'
-		__author__     = 'Attis'
-		language       = 'pl'
-		oldest_article = 7
-		max_articles_per_feed = 100
-		INDEX          = u'http://kopalniawiedzy.pl/'
-		remove_javascript     = True    
-		no_stylesheets        = True
+        title          = u'Kopalnia Wiedzy'
+        publisher      = u'Kopalnia Wiedzy'
+        description    = u'Ciekawostki ze świata nauki i techniki'
+        encoding       = 'utf-8'
+        __author__     = 'Attis & Tomasz Długosz'
+        language       = 'pl'
+        oldest_article = 7
+        max_articles_per_feed = 100
+        INDEX          = u'http://kopalniawiedzy.pl/'
+        remove_javascript     = True
+        no_stylesheets        = True

-		remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}]
-		remove_tags_after = dict(attrs={'class':'ad-square'})
-		keep_only_tags    = [dict(name="div", attrs={'id':'articleContent'})]
-		extra_css      = '.topimage {margin-top: 30px}'
+        remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
+        remove_tags_after = dict(attrs={'class':'ad-square'})
+        keep_only_tags    = [dict(name="div", attrs={'class':'article-text text-small'})]
+        extra_css      = '.topimage {margin-top: 30px}'

-		preprocess_regexps = [
-				(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
-				lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
-				(re.compile(u'<br  /><br  />'),
-				lambda match: '<br\/>')
-			]
+        preprocess_regexps = [
+                (re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
+                lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
+                (re.compile(u'<br  /><br  />'),
+                lambda match: '<br\/>')
+            ]

-		feeds = [
-			(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
-			(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
-			(u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
-			(u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
-			(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
-			(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
-		]
+        feeds = [
+            (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
+            (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
+            (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
+            (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
+            (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
+            (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
+        ]

-		def is_link_wanted(self, url, tag):
-			return tag['class'] == 'next'
+        def is_link_wanted(self, url, tag):
+            return tag['class'] == 'next'

-		def remove_beyond(self, tag, next):
-				while tag is not None and getattr(tag, 'name', None) != 'body':
-						after = getattr(tag, next)
-						while after is not None:
-								ns = getattr(tag, next)
-								after.extract()
-								after = ns
-						tag = tag.parent
+        def remove_beyond(self, tag, next):
+                while tag is not None and getattr(tag, 'name', None) != 'body':
+                        after = getattr(tag, next)
+                        while after is not None:
+                                ns = getattr(tag, next)
+                                after.extract()
+                                after = ns
+                        tag = tag.parent

-		def append_page(self, soup, appendtag, position):
-				pager = soup.find('a',attrs={'class':'next'})
-				if pager:
-					nexturl = self.INDEX + pager['href']
-					soup2 = self.index_to_soup(nexturl)
-					texttag = soup2.find('div', attrs={'id':'articleContent'})
+        def append_page(self, soup, appendtag, position):
+                pager = soup.find('a',attrs={'class':'next'})
+                if pager:
+                    nexturl = self.INDEX + pager['href']
+                    soup2 = self.index_to_soup(nexturl)
+                    texttag = soup2.find('div', attrs={'id':'articleContent'})

-					tag = texttag.find(attrs={'class':'pages'})
-					self.remove_beyond(tag, 'nextSibling')
+                    tag = texttag.find(attrs={'class':'pages'})
+                    self.remove_beyond(tag, 'nextSibling')

-					newpos = len(texttag.contents)
-					self.append_page(soup2,texttag,newpos)
+                    newpos = len(texttag.contents)
+                    self.append_page(soup2,texttag,newpos)

-					appendtag.insert(position,texttag)
+                    appendtag.insert(position,texttag)


-		def preprocess_html(self, soup): 
-				self.append_page(soup, soup.body, 3)
+        def preprocess_html(self, soup):
+                self.append_page(soup, soup.body, 3)

-				for item in soup.findAll('div',attrs={'class':'pages'}):
-					item.extract()
+                for item in soup.findAll('div',attrs={'class':'pages'}):
+                    item.extract()

-				for item in soup.findAll('p', attrs={'class':'wykop'}):
-					item.extract()
+                for item in soup.findAll('p', attrs={'class':'wykop'}):
+                    item.extract()

-				return soup
+                return soup
--- a/recipes/macity.recipe
+++ b/recipes/macity.recipe
@ -0,0 +1,23 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325766771(BasicNewsRecipe):
+    title          = u'Macity'
+    language = 'it'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    def get_article_url(self, article):
+        link = BasicNewsRecipe.get_article_url(self, article)
+        if link.split('/')[-1]=="story01.htm":
+            link=link.split('/')[-2]
+            a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L'      , 'N'   , 'S'   ]
+            b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.']
+            for i in range(0,len(a)):
+                link=link.replace('0'+a[-i],b[-i])
+        return link
+
+    feeds          = [(u'Macity', u'http://www.macitynet.it.feedsportal.com/c/33714/f/599513/index.rss')]
+    __author__      = 'faber1971'
+    description = 'Apple and hi-tech news'
+
--- a/recipes/money_pl.recipe
+++ b/recipes/money_pl.recipe
@ -0,0 +1,76 @@
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FocusRecipe(BasicNewsRecipe):
+    __license__ = 'GPL v3'
+    __author__ = u'intromatyk <intromatyk@gmail.com>'
+    language = 'pl'
+    version = 1
+
+    title = u'Money.pl'
+    category = u'News'
+    description = u'Informacje finansowe z kraju i ze świata. Aktualne i archiwalne: notowania giełdowe, kursy walut, wskaźniki gospodarcze.'
+    remove_empty_feeds= True
+    no_stylesheets=True
+    oldest_article = 1
+    max_articles_per_feed = 100000
+    recursions = 0
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    simultaneous_downloads = 2
+
+    r = re.compile('.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'artykul'}))
+    remove_tags    = [dict(name='ul', attrs={'class':'socialStuff'})]
+
+    extra_css = '''
+                    body {font-family: Arial,Helvetica,sans-serif ;}
+                    h1{text-align: left;}
+                    h2{font-size: medium; font-weight: bold;}
+                    p.lead {font-weight: bold; text-align: left;}
+                    .authordate {font-size: small; color: #696969;}
+                    .fot{font-size: x-small; color: #666666;}
+                    '''
+
+    feeds          = [
+                            ('Wiadomosci z kraju', 'http://money.pl.feedsportal.com/c/33900/f/612847/index.rss'),
+                            ('Wiadomosci ze swiata', 'http://money.pl.feedsportal.com/c/33900/f/612848/index.rss'),
+                            ('Gospodarka', 'http://money.pl.feedsportal.com/c/33900/f/612849/index.rss'),
+                            ('Waluty', 'http://money.pl.feedsportal.com/c/33900/f/612850/index.rss'),
+                            ('Gielda', 'http://money.pl.feedsportal.com/c/33900/f/612851/index.rss'),
+                            ('Banki', 'http://money.pl.feedsportal.com/c/33900/f/612852/index.rss'),
+                            ('Fundusze', 'http://money.pl.feedsportal.com/c/33900/f/612853/index.rss'),
+                            ('Emerytury', 'http://money.pl.feedsportal.com/c/33900/f/612854/index.rss'),
+                            ('Podatki', 'http://money.pl.feedsportal.com/c/33900/f/612855/index.rss'),
+                            ('Ubezpieczenia', 'http://money.pl.feedsportal.com/c/33900/f/612856/index.rss'),
+                            ('Poradniki', 'http://money.pl.feedsportal.com/c/33900/f/612857/index.rss'),
+                            ('Raporty', 'http://money.pl.feedsportal.com/c/33900/f/612858/index.rss'),
+                            ('Motoryzacja', 'http://money.pl.feedsportal.com/c/33900/f/612859/index.rss'),
+                            ('Manager', 'http://money.pl.feedsportal.com/c/33900/f/612860/index.rss'),
+                            ('Dla firm', 'http://money.pl.feedsportal.com/c/33900/f/612861/index.rss'),
+                            ('Prawo', 'http://money.pl.feedsportal.com/c/33900/f/612862/index.rss'),
+                            ('Nieruchomosci', 'http://money.pl.feedsportal.com/c/33900/f/612863/index.rss'),
+                            ('Praca', 'http://money.pl.feedsportal.com/c/33900/f/612864/index.rss'),
+
+
+                           ]
+
+    def print_version(self, url):
+     if url.count ('money.pl.feedsportal.com'):
+            u = url.find('0Cartykul0C')
+            u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:]
+            u = u.replace('0C', '/')
+            u = u.replace('A', '')
+            u = u.replace ('0E','-')
+            u = u.replace ('0P',';')
+            u = u.replace ('0H',',')
+            u = u.replace ('0B','.')
+            u = u.replace (',0,',',-1,')
+            u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '')
+     else:
+            u = url.replace('/nc/1','/do-druku/1')
+     return u
--- a/recipes/wired_it.recipe
+++ b/recipes/wired_it.recipe
@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325758162(BasicNewsRecipe):
+    title          = u'Wired'
+    language = 'it'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    remove_tags_after = [dict(name='div', attrs={'class':'article_content'})]
+    feeds          = [(u'Wired', u'http://www.wired.it/rss.xml')]
+    __author__      = 'faber1971'
+    description = 'An American magazine that reports on how new technology affects culture, the economy, and politics'
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -1410,19 +1410,22 @@ class MOBIFile(object): # {{{
            self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
+        image_index = 0
        for i in xrange(fntbr, len(self.records)):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
                continue
+            image_index += 1
            r = self.records[i]
            fmt = None
-            if i >= fii and r.raw[:4] not in (b'FLIS', b'FCIS', b'SRCS',
-                    b'\xe9\x8e\r\n'):
+            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
+                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
+                    b'AUDI', b'VIDE'}:
                try:
                    width, height, fmt = identify_data(r.raw)
                except:
                    pass
            if fmt is not None:
-                self.image_records.append(ImageRecord(len(self.image_records)+1, r, fmt))
+                self.image_records.append(ImageRecord(image_index, r, fmt))
            else:
                self.binary_records.append(BinaryRecord(i, r))

--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -974,12 +974,13 @@ class MobiReader(object):
                continue
            processed_records.append(i)
            data  = self.sections[i][0]
+            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
-                # A FLIS, FCIS, SRCS or EOF record, ignore
+                # This record is a known non image type, not need to try to
+                # load the image
                continue
            buf = cStringIO.StringIO(data)
-            image_index += 1
            try:
                im = PILImage.open(buf)
                im = im.convert('RGB')
--- a/src/calibre/ebooks/oeb/display/cfi.coffee
+++ b/src/calibre/ebooks/oeb/display/cfi.coffee
@ -4,15 +4,24 @@
 ###
 Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
 Released under the GPLv3 License
- Based on code originally written by Peter Sorotkin (epubcfi.js)
+ Based on code originally written by Peter Sorotkin
+ (http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js)
+ Improvements with respect to that code:
+ 1. Works on all browsers (WebKit, Firefox and IE >= 8)
+ 2. Works if the point is after the last text character in an element
+ 3. Works for elements that are scrollable (i.e. have their own scrollbars)
+
+ To check if this script is compatible with the current browser, call
+ window.cfi.is_compatible() it will throw an exception if not compatible.
 ###
-#
-log = (error) ->
+
+log = (error) -> # {{{
    if error
        if window?.console?.log
            window.console.log(error)
        else if process?.stdout?.write
            process.stdout.write(error + '\n')
+# }}}

 # CFI escaping {{{
 escape_for_cfi = (raw) ->
@ -51,12 +60,111 @@ fstr = (d) -> # {{{
    ans
 # }}}

+get_current_time = (target) -> # {{{
+    ans = 0
+    if target.currentTime != undefined
+        ans = target.currentTime
+    fstr(ans)
+# }}}
+
+viewport_to_document = (x, y, doc) -> # {{{
+    win = doc.defaultView
+    x += win.scrollX
+    y += win.scrollY
+    if doc != window.document
+        # We are in a frame
+        node = win.frameElement
+        rect = node.getBoundingClientRect()
+        return viewport_to_document(rect.left, rect.top, node.ownerDocument)
+    return [x + win.scrollX, y + win.scrollY]
+# }}}
+
+# Equivalent for caretRangeFromPoint for non WebKit browsers {{{
+range_has_point = (range, x, y) ->
+    for rect in range.getClientRects()
+        if (rect.left <= x <= rect.right) and (rect.top <= y <= rect.bottom)
+            return true
+    return false
+
+offset_in_text_node = (node, range, x, y) ->
+    limits = [0, node.nodeValue.length]
+    while limits[0] != limits[1]
+        pivot = Math.floor( (limits[0] + limits[1]) / 2 )
+        lr = [limits[0], pivot]
+        rr = [pivot+1, limits[1]]
+        range.setStart(node, pivot)
+        range.setEnd(node, pivot+1)
+        if range_has_point(range, x, y)
+            return pivot
+        range.setStart(node, rr[0])
+        range.setEnd(node, rr[1])
+        if range_has_point(range, x, y)
+            limits = rr
+            continue
+        range.setStart(node, lr[0])
+        range.setEnd(node, lr[1])
+        if range_has_point(range, x, y)
+            limits = lr
+            continue
+        break
+    return limits[0]
+
+find_offset_for_point = (x, y, node, cdoc) ->
+    range = cdoc.createRange()
+    child = node.firstChild
+    last_child = null
+    while child
+        if child.nodeType in [3, 4, 5, 6] and child.nodeValue?.length
+            range.setStart(child, 0)
+            range.setEnd(child, child.nodeValue.length)
+            if range_has_point(range, x, y)
+                return [child, offset_in_text_node(child, range, x, y)]
+            last_child = child
+        child = child.nextSibling
+
+    if not last_child
+        throw "#{node} has no children"
+    # The point must be after the last bit of text
+    pos = 0
+    return [last_child, last_child.nodeValue.length]
+
+# }}}
+
 class CanonicalFragmentIdentifier

    # This class is a namespace to expose CFI functions via the window.cfi
    # object

-    constructor: () ->
+    constructor: () -> # {{{
+        this.CREATE_RANGE_ERR = "Your browser does not support the createRange function. Update it to a newer version."
+        this.IE_ERR = "Your browser is too old. You need Internet Explorer version 8 or newer."
+    # }}}
+
+    is_compatible: () -> # {{{
+        if not window.document.createRange
+            throw this.CREATE_RANGE_ERR
+        # Check if Internet Explorer >= 8 as getClientRects returns physical
+        # rather than logical pixels on older IE
+        div = document.createElement('div')
+        ver = 3
+        while true
+            div.innerHTML = "<!--[if gt IE #{ ++ver }]><i></i><![endif]-->"
+            if div.getElementsByTagName('i').length == 0
+                break
+        if ver > 4 and ver < 8
+            # We have IE < 8
+            throw this.IE_ERR
+    # }}}
+
+    set_current_time: (target, val) -> # {{{
+        if target.currentTime == undefined
+            return
+        if target.readyState == 4 or target.readyState == "complete"
+            target.currentTime = val
+        else
+            fn = -> target.currentTime = val
+            target.addEventListener("canplay", fn, false)
+    #}}}

    encode: (doc, node, offset, tail) -> # {{{
        cfi = tail or ""
@ -64,7 +172,7 @@ class CanonicalFragmentIdentifier
        # Handle the offset, if any
        switch node.nodeType
            when 1 # Element node
-                if typeoff(offset) == 'number'
+                if typeof(offset) == 'number'
                    node = node.childNodes.item(offset)
            when 3, 4, 5, 6 # Text/entity/CDATA node
                offset or= 0
@ -89,12 +197,12 @@ class CanonicalFragmentIdentifier
                        cfi = "!" + cfi
                        continue
                break
-            # Increase index by the length of all previous sibling text nodes
+            # Find position of node in parent
            index = 0
            child = p.firstChild
            while true
-                index |= 1
-                if child.nodeType in [1, 7]
+                index |= 1 # Increment index by 1 if it is even
+                if child.nodeType == 1
                    index++
                if child == node
                    break
@ -117,8 +225,8 @@ class CanonicalFragmentIdentifier
        error = null
        node = doc

-        until cfi.length <= 0 or error
-            if ( (r = cfi.match(simple_node_regex)) is not null ) # Path step
+        until cfi.length < 1 or error
+            if (r = cfi.match(simple_node_regex)) # Path step
                target = parseInt(r[1])
                assertion = r[2]
                if assertion
@ -136,11 +244,18 @@ class CanonicalFragmentIdentifier
                            error = "No matching child found for CFI: " + cfi
                        break
                    index |= 1 # Increment index by 1 if it is even
-                    if child.nodeType in [1, 7] # We have an element or a PI
+                    if child.nodeType == 1
                        index++
                    if ( index == target )
                        cfi = cfi.substr(r[0].length)
                        node = child
+                        if assertion and node.id != assertion
+                            # The found child does not match the id assertion,
+                            # trust the id assertion if an element with that id
+                            # exists
+                            child = doc.getElementById(assertion)
+                            if child
+                                node = child
                        break
                    child = child.nextSibling

@ -198,7 +313,9 @@ class CanonicalFragmentIdentifier
                next = false
                while true
                    nn = node.nextSibling
-                    if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata
+                    if not nn
+                        break
+                    if nn.nodeType in [3, 4, 5, 6] and nn.nodeValue?.length # Text node, entity, cdata
                        next = nn
                        break
                if not next
@ -253,7 +370,7 @@ class CanonicalFragmentIdentifier
        (if target.parentNode then target.parentNode else target).normalize()

        if name in ['audio', 'video']
-            tail = "~" + fstr target.currentTime
+            tail = "~" + get_current_time(target)

        if name in ['img', 'video']
            px = ((x + cwin.scrollX - target.offsetLeft)*100)/target.offsetWidth
@ -265,9 +382,12 @@ class CanonicalFragmentIdentifier
                if range
                    target = range.startContainer
                    offset = range.startOffset
+                else
+                    throw "Failed to find range from point (#{ x }, #{ y })"
+            else if cdoc.createRange
+                [target, offset] = find_offset_for_point(x, y, target, cdoc)
            else
-                # TODO: implement a span bisection algorithm for UAs
-                # without caretRangeFromPoint (Gecko, IE)
+                throw this.CREATE_RANGE_ERR

        this.encode(doc, target, offset, tail)
    # }}}
@ -285,52 +405,102 @@ class CanonicalFragmentIdentifier
        nwin = ndoc.defaultView
        x = null
        y = null
+        range = null

        if typeof(r.offset) == "number"
            # Character offset
+            if not ndoc.createRange
+                throw this.CREATE_RANGE_ERR
            range = ndoc.createRange()
            if r.forward
                try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}]
            else
                try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}]
-            k = 0
            a = null
            rects = null
            node_len = node.nodeValue.length
-            until rects or rects.length or k >= try_list.length
-                t = try_list[k++]
-                start_offset = r.offset + t.start
-                end_offset = r.offset + t.end
-                a = t.a
-                if start_offset < 0 or end_offset >= node_len
-                    continue
-                range.setStart(node, start_offset)
-                range.setEnd(node, end_offset)
-                rects = range.getClientRects()
+            offset = r.offset
+            for i in [0, 1]
+                # Try reducing the offset by 1 if we get no match as if it refers to the position after the
+                # last character we wont get a match with getClientRects
+                offset = r.offset - i
+                if offset < 0
+                    offset = 0
+                k = 0
+                until rects?.length or k >= try_list.length
+                    t = try_list[k++]
+                    start_offset = offset + t.start
+                    end_offset = offset + t.end
+                    a = t.a
+                    if start_offset < 0 or end_offset >= node_len
+                        continue
+                    range.setStart(node, start_offset)
+                    range.setEnd(node, end_offset)
+                    rects = range.getClientRects()
+                if rects?.length
+                    break

-            if not rects or not rects.length
+
+            if not rects?.length
                log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }")
                return null

-            rect = rects[0]
-            x = (a*rect.left + (1-a)*rect.right)
-            y = (rect.top + rect.bottom)/2
        else
-            x = node.offsetLeft - nwin.scrollX
-            y = node.offsetTop - nwin.scrollY
-            if typeof(r.x) == "number" and node.offsetWidth
-                x += (r.x*node.offsetWidth)/100
-                y += (r.y*node.offsetHeight)/100
+            [x, y] = [r.x, r.y]

-        until ndoc == doc
-            node = nwin.frameElement
+        {x:x, y:y, node:r.node, time:r.time, range:range, a:a}
+
+    # }}}
+
+    scroll_to: (cfi, callback=false, doc=window?.document) -> # {{{
+        point = this.point(cfi, doc)
+        if not point
+            log("No point found for cfi: #{ cfi }")
+            return
+        if typeof point.time == 'number'
+            this.set_current_time(point.node, point.time)
+
+        if point.range != null
+            r = point.range
+            node = r.startContainer
            ndoc = node.ownerDocument
            nwin = ndoc.defaultView
-            x += node.offsetLeft - nwin.scrollX
-            y += node.offsetTop - nwin.scrollY
+            span = ndoc.createElement('span')
+            span.setAttribute('style', 'border-width: 0; padding: 0; margin: 0')
+            r.surroundContents(span)
+            span.scrollIntoView()
+            fn = ->
+                rect = span.getBoundingClientRect()
+                x = (point.a*rect.left + (1-point.a)*rect.right)
+                y = (rect.top + rect.bottom)/2
+                [x, y] = viewport_to_document(x, y, ndoc)
+                tn = if span.firstChild then span.firstChild.nodeValue else ''
+                tn = ndoc.createTextNode(tn)
+                p = span.parentNode
+                p.insertBefore(tn, span)
+                p.removeChild(span)
+                p.normalize()
+                if callback
+                    callback(x, y)
+        else
+            node = point.node
+            nwin = node.ownerDocument.defaultView
+            node.scrollIntoView()

-        {x:x, y:y, node:r.node, time:r.time}
+            fn = ->
+                rect = node.getBoundingClientRect()
+                [x, y] = viewport_to_document(rect.left, rect.top, node.ownerDocument)
+                if typeof(point.x) == 'number' and node.offsetWidth
+                    x += (r.x*node.offsetWidth)/100
+                if typeof(point.y) == 'number' and node.offsetHeight
+                    y += (r.y*node.offsetHeight)/100
+                scrollTo(x, y)
+                if callback
+                    callback(x, y)

+        setTimeout(fn, 10)
+
+        null
    # }}}

 if window?
--- a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee
+++ b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee
@ -6,19 +6,53 @@
 Released under the GPLv3 License
 ###

-viewport_top = (node) ->
-    $(node).offset().top - window.pageYOffset
+log = (error) ->
+    if error
+        if window?.console?.log
+            window.console.log(error)
+        else if process?.stdout?.write
+            process.stdout.write(error + '\n')

-viewport_left = (node) ->
-    $(node).offset().left - window.pageXOffset
+show_cfi = () ->
+    if window.current_cfi
+        fn = (x, y) ->
+            ms = document.getElementById("marker").style
+            ms.display = 'block'
+            ms.top = y - 30 + 'px'
+            ms.left = x - 1 + 'px'
+
+        window.cfi.scroll_to(window.current_cfi, fn)
+    null
+
+mark_and_reload = (evt) ->
+    # Remove image in case the click was on the image itself, we want the cfi to
+    # be on the underlying element
+    ms = document.getElementById("marker")
+    ms.parentNode.removeChild(ms)
+
+    fn = () ->
+        window.current_cfi = window.cfi.at(evt.clientX, evt.clientY)
+        if window.current_cfi
+            epubcfi = "#epubcfi(#{ window.current_cfi })"
+            newloc = window.location.href.replace(/#.*$/, '') + epubcfi
+            window.location.replace(newloc)
+            document.getElementById('current-cfi').innerHTML = window.current_cfi
+            window.location.reload()
+
+    setTimeout(fn, 1)
+    null

 window.onload = ->
-    h1 = document.getElementsByTagName('h1')[0]
-    x = h1.scrollLeft + 150
-    y = viewport_top(h1) + h1.offsetHeight/2
-    e = document.elementFromPoint x, y
-    if e.getAttribute('id') != 'first-h1'
-        alert 'Failed to find top h1'
+    try
+        window.cfi.is_compatible()
+    catch error
+        alert(error)
        return
-    alert window.cfi.at x, y
+    document.onclick = mark_and_reload
+    r = location.hash.match(/#epubcfi\((.+)\)$/)
+    if r
+        window.current_cfi = r[1]
+        document.getElementById('current-cfi').innerHTML = window.current_cfi
+        setTimeout(show_cfi, 100)
+    null

--- a/src/calibre/ebooks/oeb/display/test/index.html
+++ b/src/calibre/ebooks/oeb/display/test/index.html
@ -0,0 +1,65 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <title>Testing CFI functionality</title>
+        <script type="text/javascript" src="cfi.coffee"></script>
+        <script type="text/javascript" src="cfi-test.coffee"></script>
+        <style type="text/css">
+            body { font-family: sans-serif }
+
+            h2 {
+                border-top: solid 2px black;
+                margin-top: 4ex;
+            }
+
+            #container {
+                max-width: 30em;
+                margin-right: auto;
+                margin-left: 2em;
+                position:relative;
+            }
+
+            #current-cfi { 
+                font-family: monospace;
+                border: solid 1px blue;
+                padding: 1em;
+            }
+            #overflow {
+                max-height: 100px;
+                overflow: scroll;
+                border: solid 1px black;
+            }
+        </style>
+    </head>
+    <body>
+        <div id="container">
+            <h1 id="first-h1">Testing EPUB CFI</h1>
+            <div id="current-cfi">Current CFI:&nbsp;None</div>
+            <h2>A div with scrollbars</h2>
+            <div id="overflow"> But I must explain to you how all this mistaken
+                idea of denouncing pleasure and praising pain was born and I
+                will give you a complete account of the system, and expound the
+                actual teachings of the great explorer of the truth, the
+                master-builder of human happiness. No one rejects, dislikes, or
+                avoids pleasure itself, because it is pleasure, but because
+                those who do not know how to pursue pleasure rationally
+                encounter consequences that are extremely painful. Nor again is
+                there anyone who <b>loves</b> or pursues or desires to obtain pain of
+                itself, because it is pain, but because occasionally
+                circumstances occur in which toil and pain can procure him some
+                great pleasure. To take a trivial example, which of us ever
+                undertakes laborious physical exercise, except to obtain some
+                advantage from it? But who has any right to find fault with a
+                man who chooses to enjoy a pleasure that has no annoying
+                consequences, or one who avoids a pain that produces no
+                resultant pleasure? On the other hand, we denounce with
+                righteous indignation and dislike men who are so beguiled and
+                demoralized by the charms of pleasure of the moment, so blinded
+                by desire, that they cannot foresee
+            </div>
+        </div>
+        <img id="marker" style="position: absolute; display:none; z-index:10" src="marker.png" alt="Marker" />
+    </body>
+</html>
+
+
--- a/src/calibre/ebooks/oeb/display/test/marker.png
+++ b/src/calibre/ebooks/oeb/display/test/marker.png
--- a/src/calibre/ebooks/oeb/display/test/test.html
+++ b/src/calibre/ebooks/oeb/display/test/test.html
@ -1,14 +0,0 @@
-<!DOCTYPE html>
-<html>
-    <head>
-        <title>Testing CFI functionality</title>
-        <script type="text/javascript" src="../cfi.coffee"></script>
-        <script type="text/javascript" src="jquery.js"></script>
-        <script type="text/javascript" src="cfi-test.coffee"></script>
-    </head>
-    <body>
-        <h1 id="first-h1" style="border: solid 1px red">Testing CFI functionality</h1>
-    </body>
-</html>
-
-
--- a/src/calibre/ebooks/oeb/display/test/test.py
+++ b/src/calibre/ebooks/oeb/display/test/test.py
@ -16,10 +16,9 @@ except ImportError:
    if False: init_calibre, serve
    from calibre.utils.coffeescript import serve

-
 def run_devel_server():
    os.chdir(os.path.dirname(os.path.abspath(__file__)))
-    serve()
+    serve(resources={'cfi.coffee':'../cfi.coffee'})

 if __name__ == '__main__':
    run_devel_server()
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -80,7 +80,7 @@ class PML_HTMLizer(object):
        'b': ('<span style="font-weight: bold;">', '</span>'),
        'l': ('<span style="font-size: 150%;">', '</span>'),
        'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'),
-        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
+        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><small><a href="#rfn-%s">return</a></small></div>'),
        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
    }

--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@ -14,7 +14,7 @@ from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.metadata import MetaInformation
 from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG
 from calibre.utils.config import prefs
-from calibre import prints
+from calibre import prints, force_unicode, as_unicode

 single_shot = partial(QTimer.singleShot, 75)

@ -66,7 +66,8 @@ class RecursiveFind(QThread): # {{{
            if self.canceled:
                return
            self.update.emit(
-                    _('Searching in')+' '+dirpath[0])
+                    _('Searching in')+' '+force_unicode(dirpath[0],
+                        filesystem_encoding))
            self.books += list(self.db.find_books_in_directory(dirpath[0],
                                            self.single_book_per_directory))

@ -82,10 +83,7 @@ class RecursiveFind(QThread): # {{{
            except Exception as err:
                import traceback
                traceback.print_exc()
-                try:
-                    msg = unicode(err)
-                except:
-                    msg = repr(err)
+                msg = as_unicode(err)
                self.found.emit(msg)
                return

--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -12,14 +12,13 @@ from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer,
                     QPainter, QPalette, QBrush, QFontDatabase, QDialog,
                     QColor, QPoint, QImage, QRegion, QVariant, QIcon,
                     QFont, pyqtSignature, QAction, QByteArray, QMenu,
-                     pyqtSignal)
+                     pyqtSignal, QSwipeGesture)
 from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings

 from calibre.utils.config import Config, StringConfig
 from calibre.utils.localization import get_language
 from calibre.gui2.viewer.config_ui import Ui_Dialog
 from calibre.gui2.viewer.flip import SlideFlip
-from calibre.gui2.viewer.gestures import Gestures
 from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig
 from calibre.constants import iswindows
 from calibre import prints, guess_type
@ -514,7 +513,6 @@ class DocumentView(QWebView): # {{{
    def __init__(self, *args):
        QWebView.__init__(self, *args)
        self.flipper = SlideFlip(self)
-        self.gestures = Gestures()
        self.is_auto_repeat_event = False
        self.debug_javascript = False
        self.shortcuts =  Shortcuts(SHORTCUTS, 'shortcuts/viewer')
@ -582,6 +580,7 @@ class DocumentView(QWebView): # {{{
            else:
                m.addAction(name, a[key], self.shortcuts.get_sequences(key)[0])
        self.goto_location_action.setMenu(self.goto_location_menu)
+        self.grabGesture(Qt.SwipeGesture)

    def goto_next_section(self, *args):
        if self.manager is not None:
@ -1047,28 +1046,24 @@ class DocumentView(QWebView): # {{{
            self.manager.viewport_resized(self.scroll_fraction)

    def event(self, ev):
-        typ = ev.type()
-        if typ == ev.TouchBegin:
-            try:
-                self.gestures.start_gesture('touch', ev)
-            except:
-                import traceback
-                traceback.print_exc()
-        elif typ == ev.TouchEnd:
-            try:
-                gesture = self.gestures.end_gesture('touch', ev, self.rect())
-            except:
-                import traceback
-                traceback.print_exc()
-            if gesture is not None:
-                ev.accept()
-                if gesture == 'lineleft':
-                    self.next_page()
-                elif gesture == 'lineright':
-                    self.previous_page()
+        if ev.type() == ev.Gesture:
+            swipe = ev.gesture(Qt.SwipeGesture)
+            if swipe is not None:
+                self.handle_swipe(swipe)
                return True
        return QWebView.event(self, ev)

+    def handle_swipe(self, swipe):
+        if swipe.state() == Qt.GestureFinished:
+            if swipe.horizontalDirection() == QSwipeGesture.Left:
+                self.previous_page()
+            elif swipe.horizontalDirection() == QSwipeGesture.Right:
+                self.next_page()
+            elif swipe.verticalDirection() == QSwipeGesture.Up:
+                self.goto_previous_section()
+            elif swipe.horizontalDirection() == QSwipeGesture.Down:
+                self.goto_next_section()
+
    def mouseReleaseEvent(self, ev):
        opos = self.document.ypos
        ret = QWebView.mouseReleaseEvent(self, ev)
--- a/src/calibre/gui2/viewer/gestures.py
+++ b/src/calibre/gui2/viewer/gestures.py
@ -1,61 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-import time
-
-class Gestures(object):
-
-    def __init__(self):
-        self.in_progress = {}
-
-    def get_boundary_point(self, event):
-        t = time.time()
-        id_ = None
-        if hasattr(event, 'touchPoints'):
-            tps = list(event.touchPoints())
-            tp = None
-            for t in tps:
-                if t.isPrimary():
-                    tp = t
-                    break
-            if tp is None:
-                tp = tps[0]
-            gp, p = tp.screenPos(), tp.pos()
-            id_ = tp.id()
-        else:
-            gp, p = event.globalPos(), event.pos()
-        return (t, gp, p, id_)
-
-    def start_gesture(self, typ, event):
-        self.in_progress[typ] = self.get_boundary_point(event)
-
-    def is_in_progress(self, typ):
-        return typ in self.in_progress
-
-    def end_gesture(self, typ, event, widget_rect):
-        if not self.is_in_progress(typ):
-            return
-        start = self.in_progress[typ]
-        end = self.get_boundary_point(event)
-        if start[3] != end[3]:
-            return
-        timespan = end[0] - start[0]
-        start_pos, end_pos = start[1], end[1]
-        xspan = end_pos.x() - start_pos.x()
-        yspan = end_pos.y() - start_pos.y()
-
-        width = widget_rect.width()
-
-        if timespan < 1.1 and abs(xspan) >= width/5. and \
-                abs(yspan) < abs(xspan)/5.:
-            # Quick horizontal gesture
-            return 'line'+('left' if xspan < 0 else 'right')
-
-        return None
-
-
-
--- a/src/calibre/utils/browser.py
+++ b/src/calibre/utils/browser.py
@ -11,7 +11,11 @@ from cookielib import CookieJar
 from mechanize import Browser as B

 class Browser(B):
-    'A cloneable mechanize browser'
+    '''
+    A cloneable mechanize browser. Useful for multithreading. The idea is that
+    each thread has a browser clone. Every clone uses the same thread safe
+    cookie jar. All clones share the same browser configuration.
+    '''

    def __init__(self):
        self._clone_actions = {}
--- a/src/calibre/utils/coffeescript.py
+++ b/src/calibre/utils/coffeescript.py
@ -11,16 +11,32 @@ __docformat__ = 'restructuredtext en'
 Utilities to help with developing coffeescript based apps
 '''
 import time, SimpleHTTPServer, SocketServer, os, subprocess
+from io import BytesIO

 class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler):

-    generated_files = set()
+    special_resources = {}
+    compiled_cs = {}
+
+    def send_head(self):
+        path = self.path
+        if path.endswith('.coffee'):
+            path = path[1:] if path.startswith('/') else path
+            path = self.special_resources.get(path, path)
+            raw, mtime = self.compile_coffeescript(path)
+            self.send_response(200)
+            self.send_header("Content-type", b'text/javascript')
+            self.send_header("Content-Length", bytes(len(raw)))
+            self.send_header("Last-Modified", self.date_time_string(int(mtime)))
+            self.end_headers()
+            return BytesIO(raw)
+
+        return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)

    def translate_path(self, path):
-        if path.endswith('jquery.js'):
+        path = self.special_resources.get(path, path)
+        if path.endswith('/jquery.js'):
            return P('content_server/jquery.js')
-        if path.endswith('.coffee'):
-            return self.compile_coffeescript(path[1:])

        return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(self,
                path)
@ -31,36 +47,33 @@ class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler):
        except:
            time.sleep(0.01)
            sstat = os.stat(src)
-        return (not os.access(dest, os.R_OK) or sstat.st_mtime >
-                os.stat(dest).st_mtime)
+        return sstat.st_mtime > dest

    def compile_coffeescript(self, src):
-        dest = os.path.splitext(src)[0] + '.js'
-        self.generated_files.add(dest)
-        if self.newer(src, dest):
-            with open(dest, 'wb') as f:
-                try:
-                    subprocess.check_call(['coffee', '-c', '-p', src], stdout=f)
-                except:
-                    print('Compilation of %s failed'%src)
-                    f.seek(0)
-                    f.truncate()
-                    f.write('// Compilation of coffeescript failed')
-                    f.write('alert("Compilation of %s failed");'%src)
-        return dest
+        raw, mtime = self.compiled_cs.get(src, (None, 0))
+        if self.newer(src, mtime):
+            mtime = time.time()
+            try:
+                raw = subprocess.check_output(['coffee', '-c', '-p', src])
+            except:
+                print('Compilation of %s failed'%src)
+                cs = '''
+                // Compilation of coffeescript failed
+                alert("Compilation of %s failed");
+                '''%src
+                raw = cs.encode('utf-8')
+            self.compiled_cs[src] = (raw, mtime)
+        return raw, mtime

-def serve(port=8000):
-    httpd = SocketServer.TCPServer(('localhost', port), Handler)
+class HTTPD(SocketServer.TCPServer):
+    allow_reuse_address = True
+
+def serve(resources={}, port=8000):
+    Handler.special_resources = resources
+    httpd = HTTPD(('0.0.0.0', port), Handler)
    print('serving at localhost:%d'%port)
    try:
-        try:
-            httpd.serve_forever()
-        except KeyboardInterrupt:
-            raise SystemExit(0)
-    finally:
-        for x in Handler.generated_files:
-            try:
-                os.remove(x)
-            except:
-                pass
+        httpd.serve_forever()
+    except KeyboardInterrupt:
+        raise SystemExit(0)