0.8.33+

2025-06-23 15:30:45 -04:00 · 2012-01-05 06:35:23 -07:00 · 2012-01-05 06:35:23 -07:00 · b6c715e1e9
commit b6c715e1e9
parent 709947cf93 5b147cd745
20 changed files with 606 additions and 273 deletions
--- a/recipes/alternet.recipe
+++ b/recipes/alternet.recipe
@ -10,11 +10,11 @@ class Alternet(BasicNewsRecipe):
    category = 'News, Magazine'
    description = 'News magazine and online community'
    feeds          = [
-	(u'Front Page', u'http://feeds.feedblitz.com/alternet'),
+        (u'Front Page', u'http://feeds.feedblitz.com/alternet'),
-	(u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
+        (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
-	(u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
+        (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
-	(u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
+        (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
-	]
+        ]
    remove_attributes = ['width', 'align','cellspacing']
    remove_javascript = True
    use_embedded_content   = False
@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe):
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
    conversion_options = {'linearize_tables': True}
--- a/recipes/goal.recipe
+++ b/recipes/goal.recipe
@ -0,0 +1,13 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325677767(BasicNewsRecipe):
    title          = u'Goal'
    oldest_article = 1
    language = 'it'
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_tags_after = [dict(id='article_content')]
    feeds          = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
    __author__      = 'faber1971'
    description    = 'Sports news from Italy'
--- a/recipes/kopalniawiedzy.recipe
+++ b/recipes/kopalniawiedzy.recipe
@ -1,79 +1,79 @@
 __license__   = 'GPL v3'
-__copyright__ = '2011, Attis <attis@attis.one.pl>'
+__copyright__ = '2011 Attis <attis@attis.one.pl>, 2012 Tomasz Długosz <tomek3d@gmail.com>'
 __version__ = 'v. 0.1'
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class KopalniaWiedzy(BasicNewsRecipe):
-		title          = u'Kopalnia Wiedzy'
+        title          = u'Kopalnia Wiedzy'
-		publisher      = u'Kopalnia Wiedzy'
+        publisher      = u'Kopalnia Wiedzy'
-		description    = u'Ciekawostki ze świata nauki i techniki'
+        description    = u'Ciekawostki ze świata nauki i techniki'
-		encoding       = 'utf-8'
+        encoding       = 'utf-8'
-		__author__     = 'Attis'
+        __author__     = 'Attis & Tomasz Długosz'
-		language       = 'pl'
+        language       = 'pl'
-		oldest_article = 7
+        oldest_article = 7
-		max_articles_per_feed = 100
+        max_articles_per_feed = 100
-		INDEX          = u'http://kopalniawiedzy.pl/'
+        INDEX          = u'http://kopalniawiedzy.pl/'
-		remove_javascript     = True    
+        remove_javascript     = True
-		no_stylesheets        = True
+        no_stylesheets        = True
-		remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}]
+        remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
-		remove_tags_after = dict(attrs={'class':'ad-square'})
+        remove_tags_after = dict(attrs={'class':'ad-square'})
-		keep_only_tags    = [dict(name="div", attrs={'id':'articleContent'})]
+        keep_only_tags    = [dict(name="div", attrs={'class':'article-text text-small'})]
-		extra_css      = '.topimage {margin-top: 30px}'
+        extra_css      = '.topimage {margin-top: 30px}'
-		preprocess_regexps = [
+        preprocess_regexps = [
-				(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
+                (re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
-				lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
+                lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
-				(re.compile(u'<br  /><br  />'),
+                (re.compile(u'<br  /><br  />'),
-				lambda match: '<br\/>')
+                lambda match: '<br\/>')
-			]
+            ]
-		feeds = [
+        feeds = [
-			(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
+            (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
-			(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
+            (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
-			(u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
+            (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
-			(u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
+            (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
-			(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
+            (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
-			(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
+            (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
-		]
+        ]
-		def is_link_wanted(self, url, tag):
+        def is_link_wanted(self, url, tag):
-			return tag['class'] == 'next'
+            return tag['class'] == 'next'
-		def remove_beyond(self, tag, next):
+        def remove_beyond(self, tag, next):
-				while tag is not None and getattr(tag, 'name', None) != 'body':
+                while tag is not None and getattr(tag, 'name', None) != 'body':
-						after = getattr(tag, next)
+                        after = getattr(tag, next)
-						while after is not None:
+                        while after is not None:
-								ns = getattr(tag, next)
+                                ns = getattr(tag, next)
-								after.extract()
+                                after.extract()
-								after = ns
+                                after = ns
-						tag = tag.parent
+                        tag = tag.parent
-		def append_page(self, soup, appendtag, position):
+        def append_page(self, soup, appendtag, position):
-				pager = soup.find('a',attrs={'class':'next'})
+                pager = soup.find('a',attrs={'class':'next'})
-				if pager:
+                if pager:
-					nexturl = self.INDEX + pager['href']
+                    nexturl = self.INDEX + pager['href']
-					soup2 = self.index_to_soup(nexturl)
+                    soup2 = self.index_to_soup(nexturl)
-					texttag = soup2.find('div', attrs={'id':'articleContent'})
+                    texttag = soup2.find('div', attrs={'id':'articleContent'})
-					tag = texttag.find(attrs={'class':'pages'})
+                    tag = texttag.find(attrs={'class':'pages'})
-					self.remove_beyond(tag, 'nextSibling')
+                    self.remove_beyond(tag, 'nextSibling')
-					newpos = len(texttag.contents)
+                    newpos = len(texttag.contents)
-					self.append_page(soup2,texttag,newpos)
+                    self.append_page(soup2,texttag,newpos)
-					appendtag.insert(position,texttag)
+                    appendtag.insert(position,texttag)
-		def preprocess_html(self, soup): 
+        def preprocess_html(self, soup):
-				self.append_page(soup, soup.body, 3)
+                self.append_page(soup, soup.body, 3)
-				for item in soup.findAll('div',attrs={'class':'pages'}):
+                for item in soup.findAll('div',attrs={'class':'pages'}):
-					item.extract()
+                    item.extract()
-				for item in soup.findAll('p', attrs={'class':'wykop'}):
+                for item in soup.findAll('p', attrs={'class':'wykop'}):
-					item.extract()
+                    item.extract()
-				return soup
+                return soup
--- a/recipes/macity.recipe
+++ b/recipes/macity.recipe
@ -0,0 +1,23 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325766771(BasicNewsRecipe):
    title          = u'Macity'
    language = 'it'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    def get_article_url(self, article):
        link = BasicNewsRecipe.get_article_url(self, article)
        if link.split('/')[-1]=="story01.htm":
            link=link.split('/')[-2]
            a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L'      , 'N'   , 'S'   ]
            b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.']
            for i in range(0,len(a)):
                link=link.replace('0'+a[-i],b[-i])
        return link
    feeds          = [(u'Macity', u'http://www.macitynet.it.feedsportal.com/c/33714/f/599513/index.rss')]
    __author__      = 'faber1971'
    description = 'Apple and hi-tech news'
--- a/recipes/money_pl.recipe
+++ b/recipes/money_pl.recipe
@ -0,0 +1,76 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class FocusRecipe(BasicNewsRecipe):
    __license__ = 'GPL v3'
    __author__ = u'intromatyk <intromatyk@gmail.com>'
    language = 'pl'
    version = 1
    title = u'Money.pl'
    category = u'News'
    description = u'Informacje finansowe z kraju i ze świata. Aktualne i archiwalne: notowania giełdowe, kursy walut, wskaźniki gospodarcze.'
    remove_empty_feeds= True
    no_stylesheets=True
    oldest_article = 1
    max_articles_per_feed = 100000
    recursions = 0
    no_stylesheets = True
    remove_javascript = True
    simultaneous_downloads = 2
    r = re.compile('.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
    keep_only_tags =[]
    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'artykul'}))
    remove_tags    = [dict(name='ul', attrs={'class':'socialStuff'})]
    extra_css = '''
                    body {font-family: Arial,Helvetica,sans-serif ;}
                    h1{text-align: left;}
                    h2{font-size: medium; font-weight: bold;}
                    p.lead {font-weight: bold; text-align: left;}
                    .authordate {font-size: small; color: #696969;}
                    .fot{font-size: x-small; color: #666666;}
                    '''
    feeds          = [
                            ('Wiadomosci z kraju', 'http://money.pl.feedsportal.com/c/33900/f/612847/index.rss'),
                            ('Wiadomosci ze swiata', 'http://money.pl.feedsportal.com/c/33900/f/612848/index.rss'),
                            ('Gospodarka', 'http://money.pl.feedsportal.com/c/33900/f/612849/index.rss'),
                            ('Waluty', 'http://money.pl.feedsportal.com/c/33900/f/612850/index.rss'),
                            ('Gielda', 'http://money.pl.feedsportal.com/c/33900/f/612851/index.rss'),
                            ('Banki', 'http://money.pl.feedsportal.com/c/33900/f/612852/index.rss'),
                            ('Fundusze', 'http://money.pl.feedsportal.com/c/33900/f/612853/index.rss'),
                            ('Emerytury', 'http://money.pl.feedsportal.com/c/33900/f/612854/index.rss'),
                            ('Podatki', 'http://money.pl.feedsportal.com/c/33900/f/612855/index.rss'),
                            ('Ubezpieczenia', 'http://money.pl.feedsportal.com/c/33900/f/612856/index.rss'),
                            ('Poradniki', 'http://money.pl.feedsportal.com/c/33900/f/612857/index.rss'),
                            ('Raporty', 'http://money.pl.feedsportal.com/c/33900/f/612858/index.rss'),
                            ('Motoryzacja', 'http://money.pl.feedsportal.com/c/33900/f/612859/index.rss'),
                            ('Manager', 'http://money.pl.feedsportal.com/c/33900/f/612860/index.rss'),
                            ('Dla firm', 'http://money.pl.feedsportal.com/c/33900/f/612861/index.rss'),
                            ('Prawo', 'http://money.pl.feedsportal.com/c/33900/f/612862/index.rss'),
                            ('Nieruchomosci', 'http://money.pl.feedsportal.com/c/33900/f/612863/index.rss'),
                            ('Praca', 'http://money.pl.feedsportal.com/c/33900/f/612864/index.rss'),
                           ]
    def print_version(self, url):
     if url.count ('money.pl.feedsportal.com'):
            u = url.find('0Cartykul0C')
            u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:]
            u = u.replace('0C', '/')
            u = u.replace('A', '')
            u = u.replace ('0E','-')
            u = u.replace ('0P',';')
            u = u.replace ('0H',',')
            u = u.replace ('0B','.')
            u = u.replace (',0,',',-1,')
            u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '')
     else:
            u = url.replace('/nc/1','/do-druku/1')
     return u
--- a/recipes/wired_it.recipe
+++ b/recipes/wired_it.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325758162(BasicNewsRecipe):
    title          = u'Wired'
    language = 'it'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_tags_after = [dict(name='div', attrs={'class':'article_content'})]
    feeds          = [(u'Wired', u'http://www.wired.it/rss.xml')]
    __author__      = 'faber1971'
    description = 'An American magazine that reports on how new technology affects culture, the economy, and politics'
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -1410,19 +1410,22 @@ class MOBIFile(object): # {{{
            self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
        image_index = 0
        for i in xrange(fntbr, len(self.records)):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
                continue
            image_index += 1
            r = self.records[i]
            fmt = None
-            if i >= fii and r.raw[:4] not in (b'FLIS', b'FCIS', b'SRCS',
+            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
-                    b'\xe9\x8e\r\n'):
+                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
                    b'AUDI', b'VIDE'}:
                try:
                    width, height, fmt = identify_data(r.raw)
                except:
                    pass
            if fmt is not None:
-                self.image_records.append(ImageRecord(len(self.image_records)+1, r, fmt))
+                self.image_records.append(ImageRecord(image_index, r, fmt))
            else:
                self.binary_records.append(BinaryRecord(i, r))
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -974,12 +974,13 @@ class MobiReader(object):
                continue
            processed_records.append(i)
            data  = self.sections[i][0]
            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
-                # A FLIS, FCIS, SRCS or EOF record, ignore
+                # This record is a known non image type, not need to try to
                # load the image
                continue
            buf = cStringIO.StringIO(data)
            image_index += 1
            try:
                im = PILImage.open(buf)
                im = im.convert('RGB')
--- a/src/calibre/ebooks/oeb/display/cfi.coffee
+++ b/src/calibre/ebooks/oeb/display/cfi.coffee
@ -4,15 +4,24 @@
 ###
 Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
 Released under the GPLv3 License
- Based on code originally written by Peter Sorotkin (epubcfi.js)
+ Based on code originally written by Peter Sorotkin
 (http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js)
 Improvements with respect to that code:
 1. Works on all browsers (WebKit, Firefox and IE >= 8)
 2. Works if the point is after the last text character in an element
 3. Works for elements that are scrollable (i.e. have their own scrollbars)
 To check if this script is compatible with the current browser, call
 window.cfi.is_compatible() it will throw an exception if not compatible.
 ###
-#
+
-log = (error) ->
+log = (error) -> # {{{
    if error
        if window?.console?.log
            window.console.log(error)
        else if process?.stdout?.write
            process.stdout.write(error + '\n')
 # }}}
 # CFI escaping {{{
 escape_for_cfi = (raw) ->
@ -51,12 +60,111 @@ fstr = (d) -> # {{{
    ans
 # }}}
 get_current_time = (target) -> # {{{
    ans = 0
    if target.currentTime != undefined
        ans = target.currentTime
    fstr(ans)
 # }}}
 viewport_to_document = (x, y, doc) -> # {{{
    win = doc.defaultView
    x += win.scrollX
    y += win.scrollY
    if doc != window.document
        # We are in a frame
        node = win.frameElement
        rect = node.getBoundingClientRect()
        return viewport_to_document(rect.left, rect.top, node.ownerDocument)
    return [x + win.scrollX, y + win.scrollY]
 # }}}
 # Equivalent for caretRangeFromPoint for non WebKit browsers {{{
 range_has_point = (range, x, y) ->
    for rect in range.getClientRects()
        if (rect.left <= x <= rect.right) and (rect.top <= y <= rect.bottom)
            return true
    return false
 offset_in_text_node = (node, range, x, y) ->
    limits = [0, node.nodeValue.length]
    while limits[0] != limits[1]
        pivot = Math.floor( (limits[0] + limits[1]) / 2 )
        lr = [limits[0], pivot]
        rr = [pivot+1, limits[1]]
        range.setStart(node, pivot)
        range.setEnd(node, pivot+1)
        if range_has_point(range, x, y)
            return pivot
        range.setStart(node, rr[0])
        range.setEnd(node, rr[1])
        if range_has_point(range, x, y)
            limits = rr
            continue
        range.setStart(node, lr[0])
        range.setEnd(node, lr[1])
        if range_has_point(range, x, y)
            limits = lr
            continue
        break
    return limits[0]
 find_offset_for_point = (x, y, node, cdoc) ->
    range = cdoc.createRange()
    child = node.firstChild
    last_child = null
    while child
        if child.nodeType in [3, 4, 5, 6] and child.nodeValue?.length
            range.setStart(child, 0)
            range.setEnd(child, child.nodeValue.length)
            if range_has_point(range, x, y)
                return [child, offset_in_text_node(child, range, x, y)]
            last_child = child
        child = child.nextSibling
    if not last_child
        throw "#{node} has no children"
    # The point must be after the last bit of text
    pos = 0
    return [last_child, last_child.nodeValue.length]
 # }}}
 class CanonicalFragmentIdentifier
    # This class is a namespace to expose CFI functions via the window.cfi
    # object
-    constructor: () ->
+    constructor: () -> # {{{
        this.CREATE_RANGE_ERR = "Your browser does not support the createRange function. Update it to a newer version."
        this.IE_ERR = "Your browser is too old. You need Internet Explorer version 8 or newer."
    # }}}
    is_compatible: () -> # {{{
        if not window.document.createRange
            throw this.CREATE_RANGE_ERR
        # Check if Internet Explorer >= 8 as getClientRects returns physical
        # rather than logical pixels on older IE
        div = document.createElement('div')
        ver = 3
        while true
            div.innerHTML = "<!--[if gt IE #{ ++ver }]><i></i><![endif]-->"
            if div.getElementsByTagName('i').length == 0
                break
        if ver > 4 and ver < 8
            # We have IE < 8
            throw this.IE_ERR
    # }}}
    set_current_time: (target, val) -> # {{{
        if target.currentTime == undefined
            return
        if target.readyState == 4 or target.readyState == "complete"
            target.currentTime = val
        else
            fn = -> target.currentTime = val
            target.addEventListener("canplay", fn, false)
    #}}}
    encode: (doc, node, offset, tail) -> # {{{
        cfi = tail or ""
@ -64,7 +172,7 @@ class CanonicalFragmentIdentifier
        # Handle the offset, if any
        switch node.nodeType
            when 1 # Element node
-                if typeoff(offset) == 'number'
+                if typeof(offset) == 'number'
                    node = node.childNodes.item(offset)
            when 3, 4, 5, 6 # Text/entity/CDATA node
                offset or= 0
@ -89,12 +197,12 @@ class CanonicalFragmentIdentifier
                        cfi = "!" + cfi
                        continue
                break
-            # Increase index by the length of all previous sibling text nodes
+            # Find position of node in parent
            index = 0
            child = p.firstChild
            while true
-                index |= 1
+                index |= 1 # Increment index by 1 if it is even
-                if child.nodeType in [1, 7]
+                if child.nodeType == 1
                    index++
                if child == node
                    break
@ -117,8 +225,8 @@ class CanonicalFragmentIdentifier
        error = null
        node = doc
-        until cfi.length <= 0 or error
+        until cfi.length < 1 or error
-            if ( (r = cfi.match(simple_node_regex)) is not null ) # Path step
+            if (r = cfi.match(simple_node_regex)) # Path step
                target = parseInt(r[1])
                assertion = r[2]
                if assertion
@ -136,11 +244,18 @@ class CanonicalFragmentIdentifier
                            error = "No matching child found for CFI: " + cfi
                        break
                    index |= 1 # Increment index by 1 if it is even
-                    if child.nodeType in [1, 7] # We have an element or a PI
+                    if child.nodeType == 1
                        index++
                    if ( index == target )
                        cfi = cfi.substr(r[0].length)
                        node = child
                        if assertion and node.id != assertion
                            # The found child does not match the id assertion,
                            # trust the id assertion if an element with that id
                            # exists
                            child = doc.getElementById(assertion)
                            if child
                                node = child
                        break
                    child = child.nextSibling
@ -198,7 +313,9 @@ class CanonicalFragmentIdentifier
                next = false
                while true
                    nn = node.nextSibling
-                    if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata
+                    if not nn
                        break
                    if nn.nodeType in [3, 4, 5, 6] and nn.nodeValue?.length # Text node, entity, cdata
                        next = nn
                        break
                if not next
@ -253,7 +370,7 @@ class CanonicalFragmentIdentifier
        (if target.parentNode then target.parentNode else target).normalize()
        if name in ['audio', 'video']
-            tail = "~" + fstr target.currentTime
+            tail = "~" + get_current_time(target)
        if name in ['img', 'video']
            px = ((x + cwin.scrollX - target.offsetLeft)*100)/target.offsetWidth
@ -265,9 +382,12 @@ class CanonicalFragmentIdentifier
                if range
                    target = range.startContainer
                    offset = range.startOffset
                else
                    throw "Failed to find range from point (#{ x }, #{ y })"
            else if cdoc.createRange
                [target, offset] = find_offset_for_point(x, y, target, cdoc)
            else
-                # TODO: implement a span bisection algorithm for UAs
+                throw this.CREATE_RANGE_ERR
                # without caretRangeFromPoint (Gecko, IE)
        this.encode(doc, target, offset, tail)
    # }}}
@ -285,52 +405,102 @@ class CanonicalFragmentIdentifier
        nwin = ndoc.defaultView
        x = null
        y = null
        range = null
        if typeof(r.offset) == "number"
            # Character offset
            if not ndoc.createRange
                throw this.CREATE_RANGE_ERR
            range = ndoc.createRange()
            if r.forward
                try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}]
            else
                try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}]
            k = 0
            a = null
            rects = null
            node_len = node.nodeValue.length
-            until rects or rects.length or k >= try_list.length
+            offset = r.offset
-                t = try_list[k++]
+            for i in [0, 1]
-                start_offset = r.offset + t.start
+                # Try reducing the offset by 1 if we get no match as if it refers to the position after the
-                end_offset = r.offset + t.end
+                # last character we wont get a match with getClientRects
-                a = t.a
+                offset = r.offset - i
-                if start_offset < 0 or end_offset >= node_len
+                if offset < 0
-                    continue
+                    offset = 0
-                range.setStart(node, start_offset)
+                k = 0
-                range.setEnd(node, end_offset)
+                until rects?.length or k >= try_list.length
-                rects = range.getClientRects()
+                    t = try_list[k++]
                    start_offset = offset + t.start
                    end_offset = offset + t.end
                    a = t.a
                    if start_offset < 0 or end_offset >= node_len
                        continue
                    range.setStart(node, start_offset)
                    range.setEnd(node, end_offset)
                    rects = range.getClientRects()
                if rects?.length
                    break
-            if not rects or not rects.length
+
            if not rects?.length
                log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }")
                return null
            rect = rects[0]
            x = (a*rect.left + (1-a)*rect.right)
            y = (rect.top + rect.bottom)/2
        else
-            x = node.offsetLeft - nwin.scrollX
+            [x, y] = [r.x, r.y]
            y = node.offsetTop - nwin.scrollY
            if typeof(r.x) == "number" and node.offsetWidth
                x += (r.x*node.offsetWidth)/100
                y += (r.y*node.offsetHeight)/100
-        until ndoc == doc
+        {x:x, y:y, node:r.node, time:r.time, range:range, a:a}
-            node = nwin.frameElement
+
    # }}}
    scroll_to: (cfi, callback=false, doc=window?.document) -> # {{{
        point = this.point(cfi, doc)
        if not point
            log("No point found for cfi: #{ cfi }")
            return
        if typeof point.time == 'number'
            this.set_current_time(point.node, point.time)
        if point.range != null
            r = point.range
            node = r.startContainer
            ndoc = node.ownerDocument
            nwin = ndoc.defaultView
-            x += node.offsetLeft - nwin.scrollX
+            span = ndoc.createElement('span')
-            y += node.offsetTop - nwin.scrollY
+            span.setAttribute('style', 'border-width: 0; padding: 0; margin: 0')
            r.surroundContents(span)
            span.scrollIntoView()
            fn = ->
                rect = span.getBoundingClientRect()
                x = (point.a*rect.left + (1-point.a)*rect.right)
                y = (rect.top + rect.bottom)/2
                [x, y] = viewport_to_document(x, y, ndoc)
                tn = if span.firstChild then span.firstChild.nodeValue else ''
                tn = ndoc.createTextNode(tn)
                p = span.parentNode
                p.insertBefore(tn, span)
                p.removeChild(span)
                p.normalize()
                if callback
                    callback(x, y)
        else
            node = point.node
            nwin = node.ownerDocument.defaultView
            node.scrollIntoView()
-        {x:x, y:y, node:r.node, time:r.time}
+            fn = ->
                rect = node.getBoundingClientRect()
                [x, y] = viewport_to_document(rect.left, rect.top, node.ownerDocument)
                if typeof(point.x) == 'number' and node.offsetWidth
                    x += (r.x*node.offsetWidth)/100
                if typeof(point.y) == 'number' and node.offsetHeight
                    y += (r.y*node.offsetHeight)/100
                scrollTo(x, y)
                if callback
                    callback(x, y)
        setTimeout(fn, 10)
        null
    # }}}
 if window?
--- a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee
+++ b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee
@ -6,19 +6,53 @@
 Released under the GPLv3 License
 ###
-viewport_top = (node) ->
+log = (error) ->
-    $(node).offset().top - window.pageYOffset
+    if error
        if window?.console?.log
            window.console.log(error)
        else if process?.stdout?.write
            process.stdout.write(error + '\n')
-viewport_left = (node) ->
+show_cfi = () ->
-    $(node).offset().left - window.pageXOffset
+    if window.current_cfi
        fn = (x, y) ->
            ms = document.getElementById("marker").style
            ms.display = 'block'
            ms.top = y - 30 + 'px'
            ms.left = x - 1 + 'px'
        window.cfi.scroll_to(window.current_cfi, fn)
    null
 mark_and_reload = (evt) ->
    # Remove image in case the click was on the image itself, we want the cfi to
    # be on the underlying element
    ms = document.getElementById("marker")
    ms.parentNode.removeChild(ms)
    fn = () ->
        window.current_cfi = window.cfi.at(evt.clientX, evt.clientY)
        if window.current_cfi
            epubcfi = "#epubcfi(#{ window.current_cfi })"
            newloc = window.location.href.replace(/#.*$/, '') + epubcfi
            window.location.replace(newloc)
            document.getElementById('current-cfi').innerHTML = window.current_cfi
            window.location.reload()
    setTimeout(fn, 1)
    null
 window.onload = ->
-    h1 = document.getElementsByTagName('h1')[0]
+    try
-    x = h1.scrollLeft + 150
+        window.cfi.is_compatible()
-    y = viewport_top(h1) + h1.offsetHeight/2
+    catch error
-    e = document.elementFromPoint x, y
+        alert(error)
    if e.getAttribute('id') != 'first-h1'
        alert 'Failed to find top h1'
        return
-    alert window.cfi.at x, y
+    document.onclick = mark_and_reload
    r = location.hash.match(/#epubcfi\((.+)\)$/)
    if r
        window.current_cfi = r[1]
        document.getElementById('current-cfi').innerHTML = window.current_cfi
        setTimeout(show_cfi, 100)
    null
--- a/src/calibre/ebooks/oeb/display/test/index.html
+++ b/src/calibre/ebooks/oeb/display/test/index.html
@ -0,0 +1,65 @@
 <!DOCTYPE html>
 <html>
    <head>
        <title>Testing CFI functionality</title>
        <script type="text/javascript" src="cfi.coffee"></script>
        <script type="text/javascript" src="cfi-test.coffee"></script>
        <style type="text/css">
            body { font-family: sans-serif }
            h2 {
                border-top: solid 2px black;
                margin-top: 4ex;
            }
            #container {
                max-width: 30em;
                margin-right: auto;
                margin-left: 2em;
                position:relative;
            }
            #current-cfi { 
                font-family: monospace;
                border: solid 1px blue;
                padding: 1em;
            }
            #overflow {
                max-height: 100px;
                overflow: scroll;
                border: solid 1px black;
            }
        </style>
    </head>
    <body>
        <div id="container">
            <h1 id="first-h1">Testing EPUB CFI</h1>
            <div id="current-cfi">Current CFI:&nbsp;None</div>
            <h2>A div with scrollbars</h2>
            <div id="overflow"> But I must explain to you how all this mistaken
                idea of denouncing pleasure and praising pain was born and I
                will give you a complete account of the system, and expound the
                actual teachings of the great explorer of the truth, the
                master-builder of human happiness. No one rejects, dislikes, or
                avoids pleasure itself, because it is pleasure, but because
                those who do not know how to pursue pleasure rationally
                encounter consequences that are extremely painful. Nor again is
                there anyone who <b>loves</b> or pursues or desires to obtain pain of
                itself, because it is pain, but because occasionally
                circumstances occur in which toil and pain can procure him some
                great pleasure. To take a trivial example, which of us ever
                undertakes laborious physical exercise, except to obtain some
                advantage from it? But who has any right to find fault with a
                man who chooses to enjoy a pleasure that has no annoying
                consequences, or one who avoids a pain that produces no
                resultant pleasure? On the other hand, we denounce with
                righteous indignation and dislike men who are so beguiled and
                demoralized by the charms of pleasure of the moment, so blinded
                by desire, that they cannot foresee
            </div>
        </div>
        <img id="marker" style="position: absolute; display:none; z-index:10" src="marker.png" alt="Marker" />
    </body>
 </html>
--- a/src/calibre/ebooks/oeb/display/test/marker.png
+++ b/src/calibre/ebooks/oeb/display/test/marker.png
--- a/src/calibre/ebooks/oeb/display/test/test.html
+++ b/src/calibre/ebooks/oeb/display/test/test.html
@ -1,14 +0,0 @@
 <!DOCTYPE html>
 <html>
    <head>
        <title>Testing CFI functionality</title>
        <script type="text/javascript" src="../cfi.coffee"></script>
        <script type="text/javascript" src="jquery.js"></script>
        <script type="text/javascript" src="cfi-test.coffee"></script>
    </head>
    <body>
        <h1 id="first-h1" style="border: solid 1px red">Testing CFI functionality</h1>
    </body>
 </html>
--- a/src/calibre/ebooks/oeb/display/test/test.py
+++ b/src/calibre/ebooks/oeb/display/test/test.py
@ -16,10 +16,9 @@ except ImportError:
    if False: init_calibre, serve
    from calibre.utils.coffeescript import serve
 def run_devel_server():
    os.chdir(os.path.dirname(os.path.abspath(__file__)))
-    serve()
+    serve(resources={'cfi.coffee':'../cfi.coffee'})
 if __name__ == '__main__':
    run_devel_server()
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -80,7 +80,7 @@ class PML_HTMLizer(object):
        'b': ('<span style="font-weight: bold;">', '</span>'),
        'l': ('<span style="font-size: 150%;">', '</span>'),
        'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'),
-        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
+        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><small><a href="#rfn-%s">return</a></small></div>'),
        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
    }
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@ -14,7 +14,7 @@ from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.metadata import MetaInformation
 from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG
 from calibre.utils.config import prefs
-from calibre import prints
+from calibre import prints, force_unicode, as_unicode
 single_shot = partial(QTimer.singleShot, 75)
@ -66,7 +66,8 @@ class RecursiveFind(QThread): # {{{
            if self.canceled:
                return
            self.update.emit(
-                    _('Searching in')+' '+dirpath[0])
+                    _('Searching in')+' '+force_unicode(dirpath[0],
                        filesystem_encoding))
            self.books += list(self.db.find_books_in_directory(dirpath[0],
                                            self.single_book_per_directory))
@ -82,10 +83,7 @@ class RecursiveFind(QThread): # {{{
            except Exception as err:
                import traceback
                traceback.print_exc()
-                try:
+                msg = as_unicode(err)
                    msg = unicode(err)
                except:
                    msg = repr(err)
                self.found.emit(msg)
                return
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -12,14 +12,13 @@ from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer,
                     QPainter, QPalette, QBrush, QFontDatabase, QDialog,
                     QColor, QPoint, QImage, QRegion, QVariant, QIcon,
                     QFont, pyqtSignature, QAction, QByteArray, QMenu,
-                     pyqtSignal)
+                     pyqtSignal, QSwipeGesture)
 from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
 from calibre.utils.config import Config, StringConfig
 from calibre.utils.localization import get_language
 from calibre.gui2.viewer.config_ui import Ui_Dialog
 from calibre.gui2.viewer.flip import SlideFlip
 from calibre.gui2.viewer.gestures import Gestures
 from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig
 from calibre.constants import iswindows
 from calibre import prints, guess_type
@ -514,7 +513,6 @@ class DocumentView(QWebView): # {{{
    def __init__(self, *args):
        QWebView.__init__(self, *args)
        self.flipper = SlideFlip(self)
        self.gestures = Gestures()
        self.is_auto_repeat_event = False
        self.debug_javascript = False
        self.shortcuts =  Shortcuts(SHORTCUTS, 'shortcuts/viewer')
@ -582,6 +580,7 @@ class DocumentView(QWebView): # {{{
            else:
                m.addAction(name, a[key], self.shortcuts.get_sequences(key)[0])
        self.goto_location_action.setMenu(self.goto_location_menu)
        self.grabGesture(Qt.SwipeGesture)
    def goto_next_section(self, *args):
        if self.manager is not None:
@ -1047,28 +1046,24 @@ class DocumentView(QWebView): # {{{
            self.manager.viewport_resized(self.scroll_fraction)
    def event(self, ev):
-        typ = ev.type()
+        if ev.type() == ev.Gesture:
-        if typ == ev.TouchBegin:
+            swipe = ev.gesture(Qt.SwipeGesture)
-            try:
+            if swipe is not None:
-                self.gestures.start_gesture('touch', ev)
+                self.handle_swipe(swipe)
            except:
                import traceback
                traceback.print_exc()
        elif typ == ev.TouchEnd:
            try:
                gesture = self.gestures.end_gesture('touch', ev, self.rect())
            except:
                import traceback
                traceback.print_exc()
            if gesture is not None:
                ev.accept()
                if gesture == 'lineleft':
                    self.next_page()
                elif gesture == 'lineright':
                    self.previous_page()
                return True
        return QWebView.event(self, ev)
    def handle_swipe(self, swipe):
        if swipe.state() == Qt.GestureFinished:
            if swipe.horizontalDirection() == QSwipeGesture.Left:
                self.previous_page()
            elif swipe.horizontalDirection() == QSwipeGesture.Right:
                self.next_page()
            elif swipe.verticalDirection() == QSwipeGesture.Up:
                self.goto_previous_section()
            elif swipe.horizontalDirection() == QSwipeGesture.Down:
                self.goto_next_section()
    def mouseReleaseEvent(self, ev):
        opos = self.document.ypos
        ret = QWebView.mouseReleaseEvent(self, ev)
--- a/src/calibre/gui2/viewer/gestures.py
+++ b/src/calibre/gui2/viewer/gestures.py
@ -1,61 +0,0 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import time
 class Gestures(object):
    def __init__(self):
        self.in_progress = {}
    def get_boundary_point(self, event):
        t = time.time()
        id_ = None
        if hasattr(event, 'touchPoints'):
            tps = list(event.touchPoints())
            tp = None
            for t in tps:
                if t.isPrimary():
                    tp = t
                    break
            if tp is None:
                tp = tps[0]
            gp, p = tp.screenPos(), tp.pos()
            id_ = tp.id()
        else:
            gp, p = event.globalPos(), event.pos()
        return (t, gp, p, id_)
    def start_gesture(self, typ, event):
        self.in_progress[typ] = self.get_boundary_point(event)
    def is_in_progress(self, typ):
        return typ in self.in_progress
    def end_gesture(self, typ, event, widget_rect):
        if not self.is_in_progress(typ):
            return
        start = self.in_progress[typ]
        end = self.get_boundary_point(event)
        if start[3] != end[3]:
            return
        timespan = end[0] - start[0]
        start_pos, end_pos = start[1], end[1]
        xspan = end_pos.x() - start_pos.x()
        yspan = end_pos.y() - start_pos.y()
        width = widget_rect.width()
        if timespan < 1.1 and abs(xspan) >= width/5. and \
                abs(yspan) < abs(xspan)/5.:
            # Quick horizontal gesture
            return 'line'+('left' if xspan < 0 else 'right')
        return None
--- a/src/calibre/utils/browser.py
+++ b/src/calibre/utils/browser.py
@ -11,7 +11,11 @@ from cookielib import CookieJar
 from mechanize import Browser as B
 class Browser(B):
-    'A cloneable mechanize browser'
+    '''
    A cloneable mechanize browser. Useful for multithreading. The idea is that
    each thread has a browser clone. Every clone uses the same thread safe
    cookie jar. All clones share the same browser configuration.
    '''
    def __init__(self):
        self._clone_actions = {}
--- a/src/calibre/utils/coffeescript.py
+++ b/src/calibre/utils/coffeescript.py
@ -11,16 +11,32 @@ __docformat__ = 'restructuredtext en'
 Utilities to help with developing coffeescript based apps
 '''
 import time, SimpleHTTPServer, SocketServer, os, subprocess
 from io import BytesIO
 class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler):
-    generated_files = set()
+    special_resources = {}
    compiled_cs = {}
    def send_head(self):
        path = self.path
        if path.endswith('.coffee'):
            path = path[1:] if path.startswith('/') else path
            path = self.special_resources.get(path, path)
            raw, mtime = self.compile_coffeescript(path)
            self.send_response(200)
            self.send_header("Content-type", b'text/javascript')
            self.send_header("Content-Length", bytes(len(raw)))
            self.send_header("Last-Modified", self.date_time_string(int(mtime)))
            self.end_headers()
            return BytesIO(raw)
        return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
    def translate_path(self, path):
-        if path.endswith('jquery.js'):
+        path = self.special_resources.get(path, path)
        if path.endswith('/jquery.js'):
            return P('content_server/jquery.js')
        if path.endswith('.coffee'):
            return self.compile_coffeescript(path[1:])
        return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(self,
                path)
@ -31,36 +47,33 @@ class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler):
        except:
            time.sleep(0.01)
            sstat = os.stat(src)
-        return (not os.access(dest, os.R_OK) or sstat.st_mtime >
+        return sstat.st_mtime > dest
                os.stat(dest).st_mtime)
    def compile_coffeescript(self, src):
-        dest = os.path.splitext(src)[0] + '.js'
+        raw, mtime = self.compiled_cs.get(src, (None, 0))
-        self.generated_files.add(dest)
+        if self.newer(src, mtime):
-        if self.newer(src, dest):
+            mtime = time.time()
-            with open(dest, 'wb') as f:
+            try:
-                try:
+                raw = subprocess.check_output(['coffee', '-c', '-p', src])
-                    subprocess.check_call(['coffee', '-c', '-p', src], stdout=f)
+            except:
-                except:
+                print('Compilation of %s failed'%src)
-                    print('Compilation of %s failed'%src)
+                cs = '''
-                    f.seek(0)
+                // Compilation of coffeescript failed
-                    f.truncate()
+                alert("Compilation of %s failed");
-                    f.write('// Compilation of coffeescript failed')
+                '''%src
-                    f.write('alert("Compilation of %s failed");'%src)
+                raw = cs.encode('utf-8')
-        return dest
+            self.compiled_cs[src] = (raw, mtime)
        return raw, mtime
-def serve(port=8000):
+class HTTPD(SocketServer.TCPServer):
-    httpd = SocketServer.TCPServer(('localhost', port), Handler)
+    allow_reuse_address = True
 def serve(resources={}, port=8000):
    Handler.special_resources = resources
    httpd = HTTPD(('0.0.0.0', port), Handler)
    print('serving at localhost:%d'%port)
    try:
-        try:
+        httpd.serve_forever()
-            httpd.serve_forever()
+    except KeyboardInterrupt:
-        except KeyboardInterrupt:
+        raise SystemExit(0)
            raise SystemExit(0)
    finally:
        for x in Handler.generated_files:
            try:
                os.remove(x)
            except:
                pass