diff --git a/recipes/blic.recipe b/recipes/blic.recipe index 384518ec13..e184c1a9b0 100644 --- a/recipes/blic.recipe +++ b/recipes/blic.recipe @@ -1,6 +1,6 @@ __license__ = 'GPL v3' -__copyright__ = '2008-2011, Darko Miletic ' +__copyright__ = '2008-2012, Darko Miletic ' ''' blic.rs ''' @@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe): def print_version(self, url): return url + '/print' - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup + def get_cover_url(self): + soup = self.index_to_soup('http://www.blic.rs/') + alink = soup.find('a', attrs={'id':'blic_naslovna_print'}) + if alink: + return 'http://www.blic.rs' + alink['href'] + return None + \ No newline at end of file diff --git a/recipes/oreilly_premium.recipe b/recipes/oreilly_premium.recipe index c5a615900c..94d24c1e8e 100644 --- a/recipes/oreilly_premium.recipe +++ b/recipes/oreilly_premium.recipe @@ -1,8 +1,15 @@ +# Talking Points is not grabbing everything. +# The look is right, but only the last one added? import re import time from calibre.web.feeds.recipes import BasicNewsRecipe # Allows the Python soup converter, which makes parsing easier. from calibre.ebooks.BeautifulSoup import BeautifulSoup +# strip ads and graphics +# Current Column lacks a title. +# Talking Points Memo - shorten title - Remove year and Bill's name +# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries. +# Newsletters: Talking Points Memos covered by cat12 class OReillyPremium(BasicNewsRecipe): title = u'OReilly Premium' @@ -19,7 +26,17 @@ class OReillyPremium(BasicNewsRecipe): # Don't go down recursions = 0 max_articles_per_feed = 2000 - language = 'en' + + debugMessages = True + + # Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList + catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []], + ["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []], + ["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []], + ["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []], + ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []], + ["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []] + ] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -31,6 +48,8 @@ class OReillyPremium(BasicNewsRecipe): br.submit() return br + # Returns the best-guess print url. + # The second parameter (pageURL) is returned if nothing is found. def extractPrintURL(self, baseURL, pageURL, printString): tagURL = pageURL soup = self.index_to_soup(pageURL) @@ -38,7 +57,6 @@ class OReillyPremium(BasicNewsRecipe): printText = soup.find('a', text=printString) else : print("Failed to find Print string "+printString+ " in "+pageURL) - if printText: tag = printText.parent tagURL = baseURL+tag['href'] @@ -47,177 +65,111 @@ class OReillyPremium(BasicNewsRecipe): def stripBadChars(self, inString) : return inString.replace("\'", "") - - # returns a qualifying article list - def parseNoSpinArchives(self, baseURL, soupURL, debugMessages): - articleList = [] - soup = self.index_to_soup(soupURL) - for div in soup.findAll(True, attrs={'class':['blogBody'], 'style':['padding-top:10px;']}): - a = div.find('a', href=True) - if not a: - continue - # re == regex. [href] is the link - url = baseURL - url +=re.sub(r'\?.*', '', a['href']) - # Get print version - printURL = self.extractPrintURL(baseURL, url, "Print this entry") - if printURL: - url = printURL - title = self.tag_to_string(a, use_alt=True).strip() - if debugMessages : - print("No Spin Archive Title:"+title+" at url: "+url) - description = 'None' - pubdate = time.strftime('%a, %d %b') - summary = div.find(True, attrs={'class':'summary'}) - if summary: - description = self.tag_to_string(summary, use_alt=False) - articleList.append(dict(title=title, url=url, date=pubdate, description=description, content='')) - return articleList - - - def parseTVArchives(self, baseURL, soupURL, debugMessages): - # TV Archives page has some Ajax, so look for the static only. - articleList = [] - soup = self.index_to_soup(soupURL) - if debugMessages : - print("In parseTVArchives") - for div in soup.findAll('a', {'class':['showLinks','homeLinks']}): - a = div - url = baseURL - url +=a['href'] - printURL = self.extractPrintURL(baseURL, url, "Print this entry") - if printURL: - url = printURL - title = self.tag_to_string(a, use_alt=True).strip() - title = self.stripBadChars(title) - if debugMessages : - print("TV Archive "+title+" at url: "+url) - description = 'None' - pubdate = time.strftime('%a, %d %b') - summary = div.find(True, attrs={'class':'summary'}) - if summary: - description = self.tag_to_string(summary, use_alt=False) - articleList.append(dict(title=title, url=url, date=pubdate, description=description, content='')) - if debugMessages : - print("Leaving TV Parse ") - return articleList - - # Get Daily Briefing Archives - def parseDailyBriefs(self, baseURL, soupURL, debugMessages) : - print("Starting daily briefs") - articleList = [] - soup = self.index_to_soup(soupURL) - for div in soup.findAll(True, attrs={'class':['defaultHeaderSmallLinks']}): - # re == regex. [href] is the link - url = baseURL - url +=re.sub(r'\?.*', '', div['href']) - printURL = self.extractPrintURL(baseURL, url, "Print this entry") - if printURL: - url = printURL - title = div.contents[0] - if debugMessages : - print("Daily Brief - title:"+title+" at url: "+url) - description = 'None' - pubdate = time.strftime('%a, %d %b') - summary = div.find(True, attrs={'class':'summary'}) - if summary: - description = self.tag_to_string(summary, use_alt=False) - articleList.append(dict(title=title, url=url, date=pubdate, description=description, content='')) - print("Leaving daily briefs") - return articleList - - # Get the weekly Stratfor intelligence report - def parseStratfor(self, baseURL, soupURL, debugMessages): - # http://www.billoreilly.com/blog?categoryID=5 - articleList = [] - soup = self.index_to_soup(soupURL) - if debugMessages : - print("In parseStratfor") - a = soup.find('a', {'class':['blogLinks']}) - url = baseURL - url +=a['href'] - title = self.tag_to_string(a, use_alt=True).strip() - if debugMessages : - print("url: "+url) - print("title:"+title) - # Get Stratfor contents so we can get the real title. - stratSoup = self.index_to_soup(url) - title = stratSoup.html.head.title.string - stratIndex = title.find('Stratfor.com:', 0) - if (stratIndex > -1) : - title = title[stratIndex+14:-1] - # Look for first blogBody -1) : + title = title[stratIndex+14:-1] + # Look for first blogBody # {{{ fstr(ans) # }}} -window_scroll_pos = (win) -> # {{{ +window_scroll_pos = (win=window) -> # {{{ if typeof(win.pageXOffset) == 'number' x = win.pageXOffset y = win.pageYOffset @@ -86,18 +87,18 @@ window_scroll_pos = (win) -> # {{{ return [x, y] # }}} -viewport_to_document = (x, y, doc) -> # {{{ +viewport_to_document = (x, y, doc=window?.document) -> # {{{ + until doc == window.document + # We are in a frame + frame = doc.defaultView.frameElement + rect = frame.getBoundingClientRect() + x += rect.left + y += rect.top + doc = frame.ownerDocument win = doc.defaultView [wx, wy] = window_scroll_pos(win) x += wx y += wy - if doc != window.document - # We are in a frame - node = win.frameElement - rect = node.getBoundingClientRect() - [vx, vy] = viewport_to_document(rect.left, rect.top, node.ownerDocument) - x += vx - y += vy return [x, y] # }}} @@ -157,7 +158,8 @@ class CanonicalFragmentIdentifier is_compatible(): Throws an error if the browser is not compatible with this script - at(x, y): which maps a point to a CFI, if possible + at(x, y): Maps a point to a CFI, if possible + at_current(): Returns the CFI corresponding to the current viewport scroll location scroll_to(cfi): which scrolls the browser to a point corresponding to the given cfi, and returns the x and y co-ordinates of the point. @@ -397,6 +399,8 @@ class CanonicalFragmentIdentifier if not cd break + # We have an embedded document, transforms x, y into the co-prd + # system of the embedded document's viewport rect = target.getBoundingClientRect() x -= rect.left y -= rect.top @@ -557,11 +561,73 @@ class CanonicalFragmentIdentifier null # }}} - current_cfi: () -> # {{{ + at_current: () -> # {{{ [winx, winy] = window_scroll_pos() [winw, winh] = [window.innerWidth, window.innerHeight] + max = Math.max winw = max(winw, 400) winh = max(winh, 600) + deltay = Math.floor(winh/50) + deltax = Math.floor(winw/25) + miny = max(-winy, -winh) + maxy = winh + minx = max(-winx, -winw) + maxx = winw + + dist = (p1, p2) -> + Math.sqrt(Math.pow(p1[0]-p2[0], 2), Math.pow(p1[1]-p2[1], 2)) + + get_cfi = (ox, oy) -> + try + cfi = this.at(ox, oy) + point = this.point(cfi) + catch err + cfi = null + + if point.range != null + r = point.range + rect = r.getClientRects()[0] + + x = (point.a*rect.left + (1-point.a)*rect.right) + y = (rect.top + rect.bottom)/2 + [x, y] = viewport_to_document(x, y, r.startContainer.ownerDocument) + else + node = point.node + r = node.getBoundingClientRect() + [x, y] = viewport_to_document(r.left, r.top, node.ownerDocument) + if typeof(point.x) == 'number' and node.offsetWidth + x += (point.x*node.offsetWidth)/100 + if typeof(point.y) == 'number' and node.offsetHeight + y += (point.y*node.offsetHeight)/100 + + if dist(viewport_to_document(ox, oy), [x, y]) > 50 + cfi = null + + return cfi + + x_loop = (cury) -> + for direction in [-1, 1] + delta = deltax * direction + curx = 0 + until (direction < 0 and curx < minx) or (direction > 0 and curx > maxx) + cfi = get_cfi(curx, cury) + if cfi + return cfi + curx += delta + null + + for direction in [-1, 1] + delta = deltay * direction + cury = 0 + until (direction < 0 and cury < miny) or (direction > 0 and cury > maxy) + cfi = x_loop(cury, -1) + if cfi + return cfi + cury += delta + + # TODO: Return the CFI corresponding to the tag + null + # }}} if window? diff --git a/src/calibre/ebooks/oeb/display/test-cfi/cfi-test.coffee b/src/calibre/ebooks/oeb/display/test-cfi/cfi-test.coffee index 3479c95511..663e830441 100644 --- a/src/calibre/ebooks/oeb/display/test-cfi/cfi-test.coffee +++ b/src/calibre/ebooks/oeb/display/test-cfi/cfi-test.coffee @@ -59,26 +59,13 @@ mark_and_reload = (evt) -> setTimeout(fn, 1) null -window_scroll_pos = (win) -> - if typeof(win.pageXOffset) == 'number' - x = win.pageXOffset - y = win.pageYOffset - else # IE < 9 - if document.body and ( document.body.scrollLeft or document.body.scrollTop ) - x = document.body.scrollLeft - y = document.body.scrollTop - else if document.documentElement and ( document.documentElement.scrollLeft or document.documentElement.scrollTop) - y = document.documentElement.scrollTop - x = document.documentElement.scrollLeft - return [x, y] - frame_clicked = (evt) -> iframe = evt.target.ownerDocument.defaultView.frameElement # We know that the offset parent of the iframe is body # So we can easily calculate the event co-ords w.r.t. the browser window - [winx, winy] = window_scroll_pos(window) - x = evt.clientX + iframe.offsetLeft - winx - y = evt.clientY + iframe.offsetTop - winy + rect = iframe.getBoundingClientRect() + x = evt.clientX + rect.left + y = evt.clientY + rect.top mark_and_reload({'clientX':x, 'clientY':y, 'button':evt.button}) window.onload = -> diff --git a/src/calibre/ebooks/oeb/display/test-cfi/iframe.html b/src/calibre/ebooks/oeb/display/test-cfi/iframe.html index fb2f0f969c..94761560fb 100644 --- a/src/calibre/ebooks/oeb/display/test-cfi/iframe.html +++ b/src/calibre/ebooks/oeb/display/test-cfi/iframe.html @@ -23,6 +23,7 @@ indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee

+

Test image

diff --git a/src/calibre/ebooks/oeb/display/test-cfi/index.html b/src/calibre/ebooks/oeb/display/test-cfi/index.html index 9ece7940a7..8398d27791 100644 --- a/src/calibre/ebooks/oeb/display/test-cfi/index.html +++ b/src/calibre/ebooks/oeb/display/test-cfi/index.html @@ -1,7 +1,7 @@ - Testing EPUB CFI + Testing cfi.coffee