mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
c68075bc08
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
blic.rs
|
blic.rs
|
||||||
'''
|
'''
|
||||||
@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '/print'
|
return url + '/print'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def get_cover_url(self):
|
||||||
for item in soup.findAll(style=True):
|
soup = self.index_to_soup('http://www.blic.rs/')
|
||||||
del item['style']
|
alink = soup.find('a', attrs={'id':'blic_naslovna_print'})
|
||||||
return soup
|
if alink:
|
||||||
|
return 'http://www.blic.rs' + alink['href']
|
||||||
|
return None
|
||||||
|
|
@ -1,8 +1,15 @@
|
|||||||
|
# Talking Points is not grabbing everything.
|
||||||
|
# The look is right, but only the last one added?
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
# Allows the Python soup converter, which makes parsing easier.
|
# Allows the Python soup converter, which makes parsing easier.
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
# strip ads and graphics
|
||||||
|
# Current Column lacks a title.
|
||||||
|
# Talking Points Memo - shorten title - Remove year and Bill's name
|
||||||
|
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
||||||
|
# Newsletters: Talking Points Memos covered by cat12
|
||||||
|
|
||||||
class OReillyPremium(BasicNewsRecipe):
|
class OReillyPremium(BasicNewsRecipe):
|
||||||
title = u'OReilly Premium'
|
title = u'OReilly Premium'
|
||||||
@ -19,7 +26,17 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
# Don't go down
|
# Don't go down
|
||||||
recursions = 0
|
recursions = 0
|
||||||
max_articles_per_feed = 2000
|
max_articles_per_feed = 2000
|
||||||
language = 'en'
|
|
||||||
|
debugMessages = True
|
||||||
|
|
||||||
|
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||||
|
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
|
||||||
|
["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
||||||
|
["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
||||||
|
["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
||||||
|
["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
||||||
|
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
|
||||||
|
]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
@ -31,6 +48,8 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
# Returns the best-guess print url.
|
||||||
|
# The second parameter (pageURL) is returned if nothing is found.
|
||||||
def extractPrintURL(self, baseURL, pageURL, printString):
|
def extractPrintURL(self, baseURL, pageURL, printString):
|
||||||
tagURL = pageURL
|
tagURL = pageURL
|
||||||
soup = self.index_to_soup(pageURL)
|
soup = self.index_to_soup(pageURL)
|
||||||
@ -38,7 +57,6 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
printText = soup.find('a', text=printString)
|
printText = soup.find('a', text=printString)
|
||||||
else :
|
else :
|
||||||
print("Failed to find Print string "+printString+ " in "+pageURL)
|
print("Failed to find Print string "+printString+ " in "+pageURL)
|
||||||
|
|
||||||
if printText:
|
if printText:
|
||||||
tag = printText.parent
|
tag = printText.parent
|
||||||
tagURL = baseURL+tag['href']
|
tagURL = baseURL+tag['href']
|
||||||
@ -47,177 +65,111 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
def stripBadChars(self, inString) :
|
def stripBadChars(self, inString) :
|
||||||
return inString.replace("\'", "")
|
return inString.replace("\'", "")
|
||||||
|
|
||||||
|
def parseGeneric(self, baseURL):
|
||||||
# returns a qualifying article list
|
# Does a generic parsing of the articles. There are six categories (0-5)
|
||||||
def parseNoSpinArchives(self, baseURL, soupURL, debugMessages):
|
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||||
articleList = []
|
# NoSpin and TV are generic
|
||||||
soup = self.index_to_soup(soupURL)
|
fullReturn = []
|
||||||
for div in soup.findAll(True, attrs={'class':['blogBody'], 'style':['padding-top:10px;']}):
|
for i in range(len(self.catList)) :
|
||||||
a = div.find('a', href=True)
|
articleList = []
|
||||||
if not a:
|
soup = self.index_to_soup(self.catList[i][1])
|
||||||
continue
|
# Set defaults
|
||||||
# re == regex. [href] is the link
|
|
||||||
url = baseURL
|
|
||||||
url +=re.sub(r'\?.*', '', a['href'])
|
|
||||||
# Get print version
|
|
||||||
printURL = self.extractPrintURL(baseURL, url, "Print this entry")
|
|
||||||
if printURL:
|
|
||||||
url = printURL
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
if debugMessages :
|
|
||||||
print("No Spin Archive Title:"+title+" at url: "+url)
|
|
||||||
description = 'None'
|
|
||||||
pubdate = time.strftime('%a, %d %b')
|
|
||||||
summary = div.find(True, attrs={'class':'summary'})
|
|
||||||
if summary:
|
|
||||||
description = self.tag_to_string(summary, use_alt=False)
|
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
|
||||||
return articleList
|
|
||||||
|
|
||||||
|
|
||||||
def parseTVArchives(self, baseURL, soupURL, debugMessages):
|
|
||||||
# TV Archives page has some Ajax, so look for the static only.
|
|
||||||
articleList = []
|
|
||||||
soup = self.index_to_soup(soupURL)
|
|
||||||
if debugMessages :
|
|
||||||
print("In parseTVArchives")
|
|
||||||
for div in soup.findAll('a', {'class':['showLinks','homeLinks']}):
|
|
||||||
a = div
|
|
||||||
url = baseURL
|
|
||||||
url +=a['href']
|
|
||||||
printURL = self.extractPrintURL(baseURL, url, "Print this entry")
|
|
||||||
if printURL:
|
|
||||||
url = printURL
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
title = self.stripBadChars(title)
|
|
||||||
if debugMessages :
|
|
||||||
print("TV Archive "+title+" at url: "+url)
|
|
||||||
description = 'None'
|
|
||||||
pubdate = time.strftime('%a, %d %b')
|
|
||||||
summary = div.find(True, attrs={'class':'summary'})
|
|
||||||
if summary:
|
|
||||||
description = self.tag_to_string(summary, use_alt=False)
|
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
|
||||||
if debugMessages :
|
|
||||||
print("Leaving TV Parse ")
|
|
||||||
return articleList
|
|
||||||
|
|
||||||
# Get Daily Briefing Archives
|
|
||||||
def parseDailyBriefs(self, baseURL, soupURL, debugMessages) :
|
|
||||||
print("Starting daily briefs")
|
|
||||||
articleList = []
|
|
||||||
soup = self.index_to_soup(soupURL)
|
|
||||||
for div in soup.findAll(True, attrs={'class':['defaultHeaderSmallLinks']}):
|
|
||||||
# re == regex. [href] is the link
|
|
||||||
url = baseURL
|
|
||||||
url +=re.sub(r'\?.*', '', div['href'])
|
|
||||||
printURL = self.extractPrintURL(baseURL, url, "Print this entry")
|
|
||||||
if printURL:
|
|
||||||
url = printURL
|
|
||||||
title = div.contents[0]
|
|
||||||
if debugMessages :
|
|
||||||
print("Daily Brief - title:"+title+" at url: "+url)
|
|
||||||
description = 'None'
|
|
||||||
pubdate = time.strftime('%a, %d %b')
|
|
||||||
summary = div.find(True, attrs={'class':'summary'})
|
|
||||||
if summary:
|
|
||||||
description = self.tag_to_string(summary, use_alt=False)
|
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
|
||||||
print("Leaving daily briefs")
|
|
||||||
return articleList
|
|
||||||
|
|
||||||
# Get the weekly Stratfor intelligence report
|
|
||||||
def parseStratfor(self, baseURL, soupURL, debugMessages):
|
|
||||||
# http://www.billoreilly.com/blog?categoryID=5
|
|
||||||
articleList = []
|
|
||||||
soup = self.index_to_soup(soupURL)
|
|
||||||
if debugMessages :
|
|
||||||
print("In parseStratfor")
|
|
||||||
a = soup.find('a', {'class':['blogLinks']})
|
|
||||||
url = baseURL
|
|
||||||
url +=a['href']
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
if debugMessages :
|
|
||||||
print("url: "+url)
|
|
||||||
print("title:"+title)
|
|
||||||
# Get Stratfor contents so we can get the real title.
|
|
||||||
stratSoup = self.index_to_soup(url)
|
|
||||||
title = stratSoup.html.head.title.string
|
|
||||||
stratIndex = title.find('Stratfor.com:', 0)
|
|
||||||
if (stratIndex > -1) :
|
|
||||||
title = title[stratIndex+14:-1]
|
|
||||||
# Look for first blogBody <td class="blogBody"
|
|
||||||
stratBody = stratSoup.find('td', {'class':['blogBody']})
|
|
||||||
if debugMessages :
|
|
||||||
print("Strat content title:"+title)
|
|
||||||
print("Strat body: "+ stratBody.contents[0])
|
|
||||||
description = 'None'
|
|
||||||
pubdate = time.strftime('%a, %d %b')
|
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
|
||||||
if debugMessages :
|
|
||||||
print("Leaving Stratfor Parse ")
|
|
||||||
return articleList
|
|
||||||
|
|
||||||
def parseTalkingPoints(self, baseURL, soupURL, debugMessages) :
|
|
||||||
# Look for blogDate. That's got the date... Then the next blogBody has the title. and then an anchor with class "homeBlogReadMore bold" has the URL.
|
|
||||||
articleList = []
|
|
||||||
soup = self.index_to_soup(soupURL)
|
|
||||||
if debugMessages :
|
|
||||||
print("Starting Talking Points")
|
|
||||||
topDate = soup.find("td", "blogBody")
|
|
||||||
if not topDate :
|
|
||||||
print("Failed to find date in Talking Points")
|
|
||||||
# This page has the contents in double-wrapped tables!
|
|
||||||
# tableParent = topDate.parent.parent
|
|
||||||
myTable = topDate.findParents('table')[0]
|
|
||||||
upOneTable = myTable.findParents('table')[0]
|
|
||||||
upTwo = upOneTable.findParents('table')[0]
|
|
||||||
# Now navigate rows of upTwo
|
|
||||||
if debugMessages :
|
|
||||||
print("Entering rows")
|
|
||||||
for rows in upTwo.findChildren("tr", recursive=False):
|
|
||||||
# Inside top level table, each row is an article
|
|
||||||
rowTable = rows.find("table")
|
|
||||||
articleTable = rowTable.find("table")
|
|
||||||
articleTable = rows.find("tr")
|
|
||||||
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
|
|
||||||
blogDate = articleTable.find("a","blogDate").contents[0]
|
|
||||||
# Skip to second blogBody for this.
|
|
||||||
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
|
|
||||||
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
|
|
||||||
# re == regex. [href] is the link
|
|
||||||
url = baseURL
|
|
||||||
url +=re.sub(r'\?.*', '', blogURL)
|
|
||||||
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
|
|
||||||
if debugMessages :
|
|
||||||
print("Talking Points Memo title "+title+" at url: "+url)
|
|
||||||
description = 'None'
|
description = 'None'
|
||||||
pubdate = time.strftime('%a, %d %b')
|
pubdate = time.strftime('%a, %d %b')
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
# Problem: 0-2 create many in an array
|
||||||
print("Exiting parseTalkingPoints\n")
|
# 3-5 create one.
|
||||||
return articleList
|
# So no for-div for 3-5
|
||||||
|
|
||||||
def parseCurrentColumn(self, baseURL, soupURL, debugMessages) :
|
if i < 3 :
|
||||||
# Only needed to get the column title. Otherwise it's all good already; there's only one column
|
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
|
||||||
articleList = []
|
print(div)
|
||||||
soup = self.index_to_soup(soupURL)
|
if i == 1:
|
||||||
titleSpan = soup.find('span', {'class':['defaultHeader']})
|
a = div.find('a', href=True)
|
||||||
title = titleSpan.contents[0]
|
else :
|
||||||
# Get Print URL since it's available
|
a = div
|
||||||
printURL = self.extractPrintURL(baseURL, soupURL, "Print This Article")
|
print(a)
|
||||||
if printURL:
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
print("Found print URL")
|
if summary:
|
||||||
url = printURL
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
if debugMessages :
|
if not a:
|
||||||
print("url: "+url)
|
continue
|
||||||
print("title:"+title)
|
# url = baseURL+re.sub(r'\?.*', '', a['href'])
|
||||||
description = 'None'
|
url = baseURL+a['href']
|
||||||
pubdate = time.strftime('%a, %d %b')
|
if i < 2 :
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||||
if debugMessages :
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
print("Leaving Stratfor Parse ")
|
elif i == 2 :
|
||||||
return articleList
|
# Daily Briefs
|
||||||
|
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||||
|
title = div.contents[0]
|
||||||
|
if self.debugMessages :
|
||||||
|
print(title+" @ "+url)
|
||||||
|
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||||
|
|
||||||
|
elif i == 3 : # Stratfor
|
||||||
|
a = soup.find('a', self.catList[i][3])
|
||||||
|
if a is None :
|
||||||
|
continue
|
||||||
|
url = baseURL+a['href']
|
||||||
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
|
# Get Stratfor contents so we can get the real title.
|
||||||
|
stratSoup = self.index_to_soup(url)
|
||||||
|
title = stratSoup.html.head.title.string
|
||||||
|
stratIndex = title.find('Stratfor.com:', 0)
|
||||||
|
if (stratIndex > -1) :
|
||||||
|
title = title[stratIndex+14:-1]
|
||||||
|
# Look for first blogBody <td class="blogBody"
|
||||||
|
# Changed 12 Jan 2012 - new page format
|
||||||
|
#stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
|
||||||
|
#stratBody = stratSoup.find('td', {'class':['blogBody']})
|
||||||
|
elif i == 4 : # Talking Points
|
||||||
|
topDate = soup.find("td", "blogBody")
|
||||||
|
if not topDate :
|
||||||
|
print("Failed to find date in Talking Points")
|
||||||
|
# This page has the contents in double-wrapped tables!
|
||||||
|
myTable = topDate.findParents('table')[0]
|
||||||
|
if myTable is not None:
|
||||||
|
upOneTable = myTable.findParents('table')[0]
|
||||||
|
if upOneTable is not None:
|
||||||
|
upTwo = upOneTable.findParents('table')[0]
|
||||||
|
if upTwo is None:
|
||||||
|
continue
|
||||||
|
# Now navigate rows of upTwo
|
||||||
|
if self.debugMessages :
|
||||||
|
print("Entering rows")
|
||||||
|
for rows in upTwo.findChildren("tr", recursive=False):
|
||||||
|
# Inside top level table, each row is an article
|
||||||
|
rowTable = rows.find("table")
|
||||||
|
articleTable = rowTable.find("table")
|
||||||
|
# This looks wrong.
|
||||||
|
articleTable = rows.find("tr")
|
||||||
|
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
|
||||||
|
blogDate = articleTable.find("a","blogDate").contents[0]
|
||||||
|
# Skip to second blogBody for this.
|
||||||
|
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
|
||||||
|
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
|
||||||
|
url = baseURL+re.sub(r'\?.*', '', blogURL)
|
||||||
|
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
|
||||||
|
if self.debugMessages :
|
||||||
|
print("Talking Points Memo title "+title+" at url: "+url)
|
||||||
|
pubdate = time.strftime('%a, %d %b')
|
||||||
|
articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
|
||||||
|
else : # Current Column
|
||||||
|
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
|
||||||
|
if titleSpan is None :
|
||||||
|
continue
|
||||||
|
title = titleSpan.contents[0]
|
||||||
|
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
|
||||||
|
if i == 3 or i == 5 :
|
||||||
|
if self.debugMessages :
|
||||||
|
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
|
||||||
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
|
if summary:
|
||||||
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
|
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||||
|
self.catList[i][3] = articleList
|
||||||
|
fullReturn.append((self.catList[i][0], articleList))
|
||||||
|
return fullReturn
|
||||||
|
|
||||||
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
||||||
# returns a list of tuple ('feed title', list of articles)
|
# returns a list of tuple ('feed title', list of articles)
|
||||||
@ -231,27 +183,8 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
# this is used instead of BasicNewsRecipe.parse_feeds().
|
# this is used instead of BasicNewsRecipe.parse_feeds().
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# Parse the page into Python Soup
|
# Parse the page into Python Soup
|
||||||
debugMessages = True
|
|
||||||
baseURL = "https://www.billoreilly.com"
|
baseURL = "https://www.billoreilly.com"
|
||||||
def feed_title(div):
|
return self.parseGeneric(baseURL)
|
||||||
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
|
||||||
# [] is list, {} is empty mapping.
|
|
||||||
articleList = []
|
|
||||||
ans = []
|
|
||||||
showList = self.parseTVArchives(baseURL, 'https://www.billoreilly.com/show?action=tvShowArchive', debugMessages)
|
|
||||||
articleList = self.parseNoSpinArchives(baseURL, 'https://www.billoreilly.com/blog?categoryID=7', debugMessages)
|
|
||||||
stratList = self.parseStratfor(baseURL, 'http://www.billoreilly.com/blog?categoryID=5', debugMessages)
|
|
||||||
dailyBriefs = self.parseDailyBriefs(baseURL, 'http://www.billoreilly.com/blog?categoryID=11', debugMessages)
|
|
||||||
talkingPoints = self.parseTalkingPoints(baseURL, 'https://www.billoreilly.com/blog?categoryID=12', debugMessages)
|
|
||||||
currentColumn = self.parseCurrentColumn(baseURL, 'https://www.billoreilly.com/currentcolumn', debugMessages)
|
|
||||||
# Below, { x:y, a:b } creates a dictionary. We return a tuple of a title and list of dict...
|
|
||||||
# Lists are constructed with square brackets, separating items with commas: [a, b, c]. Tuples are constructed by the comma operator (not within square brackets), with or without enclosing parentheses, but an empty tuple must have the enclosing parentheses, such as a, b, c or (). A single item tuple must have a trailing comma, such as (d,).
|
|
||||||
# Shows first two if talking points and no spin news. Also if they are TV Shows ande Stratfor Weekly, also if Daily Briefing and Curren Column
|
|
||||||
# So all work individually. No idea why only getting first two in TOC now.
|
|
||||||
ans = [("Talking Points Memos", talkingPoints),("No Spin News", articleList),("TV Shows", showList),("Stratfor Weekly",stratList), ("Daily Briefing", dailyBriefs),("Current Column", currentColumn)]
|
|
||||||
if debugMessages :
|
|
||||||
print ans
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.variety.com
|
www.variety.com
|
||||||
'''
|
'''
|
||||||
@ -14,11 +14,11 @@ class Variety(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'utf8'
|
||||||
publisher = 'Red Business Information'
|
publisher = 'Red Business Information'
|
||||||
category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
|
category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
masthead_url = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
|
masthead_url = 'http://images1.variety.com/graphics/variety/Variety_logo_green_tm.gif'
|
||||||
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
|
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
@ -30,17 +30,10 @@ class Variety(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [dict(name=['object','link','map'])]
|
remove_tags = [dict(name=['object','link','map'])]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'art control'})]
|
||||||
|
|
||||||
feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
|
feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
rpt = url.rpartition('?')[0]
|
rpt = url.rpartition('.html')[0]
|
||||||
artid = rpt.rpartition('/')[2]
|
return rpt + '?printerfriendly=true'
|
||||||
catidr = url.rpartition('categoryid=')[2]
|
|
||||||
catid = catidr.partition('&')[0]
|
|
||||||
return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
||||||
|
46
recipes/villagevoice.recipe
Normal file
46
recipes/villagevoice.recipe
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class VillageVoice(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Village Voice'
|
||||||
|
feeds = [
|
||||||
|
("Complete Issue", "http://villagevoice.com/syndication/issue"),
|
||||||
|
("News", "http://villagevoice.com/syndication/section/news"),
|
||||||
|
("Music", "http://villagevoice.com/syndication/section/music"),
|
||||||
|
("Movies", "http://villagevoice.com/syndication/section/film"),
|
||||||
|
#("Restaurants", "http://villagevoice.com/syndication/section/dining"),
|
||||||
|
#("Music Events", "http://villagevoice.com/syndication/events?type=music"),
|
||||||
|
#("Calendar Events", "http://villagevoice.com/syndication/events"),
|
||||||
|
#("Promotional Events", "http://villagevoice.com/syndication/promoEvents"),
|
||||||
|
#("Restaurant Guide", "http://villagevoice.com/syndication/restaurants/search")
|
||||||
|
]
|
||||||
|
|
||||||
|
auto_cleanup = True
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
masthead_url = "http://assets.villagevoice.com/img/citylogo.png"
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Barty'
|
||||||
|
|
||||||
|
seen_urls = []
|
||||||
|
|
||||||
|
# village voice breaks the article up into multiple pages, so
|
||||||
|
# parse page and grab the print url
|
||||||
|
|
||||||
|
url_regex = re.compile(r'\/content\/printVersion\/\d+',re.I)
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
if url in self.seen_urls:
|
||||||
|
return None
|
||||||
|
self.seen_urls.append( url)
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
atag = soup.find('a',attrs={'href':self.url_regex})
|
||||||
|
if atag is None:
|
||||||
|
self.log('Warning: no print url found for '+url)
|
||||||
|
else:
|
||||||
|
m = self.url_regex.search(atag['href'])
|
||||||
|
if m:
|
||||||
|
url = 'http://www.villagevoice.com'+m.group(0)
|
||||||
|
return url
|
@ -197,7 +197,7 @@ title_series_sorting = 'library_order'
|
|||||||
# For example, if the tweak is set to library_order, "The Lord of the Rings"
|
# For example, if the tweak is set to library_order, "The Lord of the Rings"
|
||||||
# will become "Lord of the Rings, The". If the tweak is set to
|
# will become "Lord of the Rings, The". If the tweak is set to
|
||||||
# strictly_alphabetic, it would remain "The Lord of the Rings". Note that the
|
# strictly_alphabetic, it would remain "The Lord of the Rings". Note that the
|
||||||
# formatter function raw_field will return the base value for title and
|
# formatter function raw_field will return the base value for title and
|
||||||
# series regardless of the setting of this tweak.
|
# series regardless of the setting of this tweak.
|
||||||
save_template_title_series_sorting = 'library_order'
|
save_template_title_series_sorting = 'library_order'
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
3. Much more comprehensive testing/error handling
|
3. Much more comprehensive testing/error handling
|
||||||
4. Properly encodes/decodes assertions
|
4. Properly encodes/decodes assertions
|
||||||
5. Handles points in the padding of elements consistently
|
5. Handles points in the padding of elements consistently
|
||||||
|
6. Has a utility method to calculate the CFI for the current viewport position robustly
|
||||||
|
|
||||||
To check if this script is compatible with the current browser, call
|
To check if this script is compatible with the current browser, call
|
||||||
window.cfi.is_compatible() it will throw an exception if not compatible.
|
window.cfi.is_compatible() it will throw an exception if not compatible.
|
||||||
@ -72,7 +73,7 @@ get_current_time = (target) -> # {{{
|
|||||||
fstr(ans)
|
fstr(ans)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
window_scroll_pos = (win) -> # {{{
|
window_scroll_pos = (win=window) -> # {{{
|
||||||
if typeof(win.pageXOffset) == 'number'
|
if typeof(win.pageXOffset) == 'number'
|
||||||
x = win.pageXOffset
|
x = win.pageXOffset
|
||||||
y = win.pageYOffset
|
y = win.pageYOffset
|
||||||
@ -86,18 +87,18 @@ window_scroll_pos = (win) -> # {{{
|
|||||||
return [x, y]
|
return [x, y]
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
viewport_to_document = (x, y, doc) -> # {{{
|
viewport_to_document = (x, y, doc=window?.document) -> # {{{
|
||||||
|
until doc == window.document
|
||||||
|
# We are in a frame
|
||||||
|
frame = doc.defaultView.frameElement
|
||||||
|
rect = frame.getBoundingClientRect()
|
||||||
|
x += rect.left
|
||||||
|
y += rect.top
|
||||||
|
doc = frame.ownerDocument
|
||||||
win = doc.defaultView
|
win = doc.defaultView
|
||||||
[wx, wy] = window_scroll_pos(win)
|
[wx, wy] = window_scroll_pos(win)
|
||||||
x += wx
|
x += wx
|
||||||
y += wy
|
y += wy
|
||||||
if doc != window.document
|
|
||||||
# We are in a frame
|
|
||||||
node = win.frameElement
|
|
||||||
rect = node.getBoundingClientRect()
|
|
||||||
[vx, vy] = viewport_to_document(rect.left, rect.top, node.ownerDocument)
|
|
||||||
x += vx
|
|
||||||
y += vy
|
|
||||||
return [x, y]
|
return [x, y]
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
@ -157,7 +158,8 @@ class CanonicalFragmentIdentifier
|
|||||||
is_compatible(): Throws an error if the browser is not compatible with
|
is_compatible(): Throws an error if the browser is not compatible with
|
||||||
this script
|
this script
|
||||||
|
|
||||||
at(x, y): which maps a point to a CFI, if possible
|
at(x, y): Maps a point to a CFI, if possible
|
||||||
|
at_current(): Returns the CFI corresponding to the current viewport scroll location
|
||||||
|
|
||||||
scroll_to(cfi): which scrolls the browser to a point corresponding to the
|
scroll_to(cfi): which scrolls the browser to a point corresponding to the
|
||||||
given cfi, and returns the x and y co-ordinates of the point.
|
given cfi, and returns the x and y co-ordinates of the point.
|
||||||
@ -397,6 +399,8 @@ class CanonicalFragmentIdentifier
|
|||||||
if not cd
|
if not cd
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# We have an embedded document, transforms x, y into the co-prd
|
||||||
|
# system of the embedded document's viewport
|
||||||
rect = target.getBoundingClientRect()
|
rect = target.getBoundingClientRect()
|
||||||
x -= rect.left
|
x -= rect.left
|
||||||
y -= rect.top
|
y -= rect.top
|
||||||
@ -557,11 +561,73 @@ class CanonicalFragmentIdentifier
|
|||||||
null
|
null
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
current_cfi: () -> # {{{
|
at_current: () -> # {{{
|
||||||
[winx, winy] = window_scroll_pos()
|
[winx, winy] = window_scroll_pos()
|
||||||
[winw, winh] = [window.innerWidth, window.innerHeight]
|
[winw, winh] = [window.innerWidth, window.innerHeight]
|
||||||
|
max = Math.max
|
||||||
winw = max(winw, 400)
|
winw = max(winw, 400)
|
||||||
winh = max(winh, 600)
|
winh = max(winh, 600)
|
||||||
|
deltay = Math.floor(winh/50)
|
||||||
|
deltax = Math.floor(winw/25)
|
||||||
|
miny = max(-winy, -winh)
|
||||||
|
maxy = winh
|
||||||
|
minx = max(-winx, -winw)
|
||||||
|
maxx = winw
|
||||||
|
|
||||||
|
dist = (p1, p2) ->
|
||||||
|
Math.sqrt(Math.pow(p1[0]-p2[0], 2), Math.pow(p1[1]-p2[1], 2))
|
||||||
|
|
||||||
|
get_cfi = (ox, oy) ->
|
||||||
|
try
|
||||||
|
cfi = this.at(ox, oy)
|
||||||
|
point = this.point(cfi)
|
||||||
|
catch err
|
||||||
|
cfi = null
|
||||||
|
|
||||||
|
if point.range != null
|
||||||
|
r = point.range
|
||||||
|
rect = r.getClientRects()[0]
|
||||||
|
|
||||||
|
x = (point.a*rect.left + (1-point.a)*rect.right)
|
||||||
|
y = (rect.top + rect.bottom)/2
|
||||||
|
[x, y] = viewport_to_document(x, y, r.startContainer.ownerDocument)
|
||||||
|
else
|
||||||
|
node = point.node
|
||||||
|
r = node.getBoundingClientRect()
|
||||||
|
[x, y] = viewport_to_document(r.left, r.top, node.ownerDocument)
|
||||||
|
if typeof(point.x) == 'number' and node.offsetWidth
|
||||||
|
x += (point.x*node.offsetWidth)/100
|
||||||
|
if typeof(point.y) == 'number' and node.offsetHeight
|
||||||
|
y += (point.y*node.offsetHeight)/100
|
||||||
|
|
||||||
|
if dist(viewport_to_document(ox, oy), [x, y]) > 50
|
||||||
|
cfi = null
|
||||||
|
|
||||||
|
return cfi
|
||||||
|
|
||||||
|
x_loop = (cury) ->
|
||||||
|
for direction in [-1, 1]
|
||||||
|
delta = deltax * direction
|
||||||
|
curx = 0
|
||||||
|
until (direction < 0 and curx < minx) or (direction > 0 and curx > maxx)
|
||||||
|
cfi = get_cfi(curx, cury)
|
||||||
|
if cfi
|
||||||
|
return cfi
|
||||||
|
curx += delta
|
||||||
|
null
|
||||||
|
|
||||||
|
for direction in [-1, 1]
|
||||||
|
delta = deltay * direction
|
||||||
|
cury = 0
|
||||||
|
until (direction < 0 and cury < miny) or (direction > 0 and cury > maxy)
|
||||||
|
cfi = x_loop(cury, -1)
|
||||||
|
if cfi
|
||||||
|
return cfi
|
||||||
|
cury += delta
|
||||||
|
|
||||||
|
# TODO: Return the CFI corresponding to the <body> tag
|
||||||
|
null
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
if window?
|
if window?
|
||||||
|
@ -59,26 +59,13 @@ mark_and_reload = (evt) ->
|
|||||||
setTimeout(fn, 1)
|
setTimeout(fn, 1)
|
||||||
null
|
null
|
||||||
|
|
||||||
window_scroll_pos = (win) ->
|
|
||||||
if typeof(win.pageXOffset) == 'number'
|
|
||||||
x = win.pageXOffset
|
|
||||||
y = win.pageYOffset
|
|
||||||
else # IE < 9
|
|
||||||
if document.body and ( document.body.scrollLeft or document.body.scrollTop )
|
|
||||||
x = document.body.scrollLeft
|
|
||||||
y = document.body.scrollTop
|
|
||||||
else if document.documentElement and ( document.documentElement.scrollLeft or document.documentElement.scrollTop)
|
|
||||||
y = document.documentElement.scrollTop
|
|
||||||
x = document.documentElement.scrollLeft
|
|
||||||
return [x, y]
|
|
||||||
|
|
||||||
frame_clicked = (evt) ->
|
frame_clicked = (evt) ->
|
||||||
iframe = evt.target.ownerDocument.defaultView.frameElement
|
iframe = evt.target.ownerDocument.defaultView.frameElement
|
||||||
# We know that the offset parent of the iframe is body
|
# We know that the offset parent of the iframe is body
|
||||||
# So we can easily calculate the event co-ords w.r.t. the browser window
|
# So we can easily calculate the event co-ords w.r.t. the browser window
|
||||||
[winx, winy] = window_scroll_pos(window)
|
rect = iframe.getBoundingClientRect()
|
||||||
x = evt.clientX + iframe.offsetLeft - winx
|
x = evt.clientX + rect.left
|
||||||
y = evt.clientY + iframe.offsetTop - winy
|
y = evt.clientY + rect.top
|
||||||
mark_and_reload({'clientX':x, 'clientY':y, 'button':evt.button})
|
mark_and_reload({'clientX':x, 'clientY':y, 'button':evt.button})
|
||||||
|
|
||||||
window.onload = ->
|
window.onload = ->
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
indignation and dislike men who are so beguiled and demoralized by
|
indignation and dislike men who are so beguiled and demoralized by
|
||||||
the charms of pleasure of the moment, so blinded by desire, that
|
the charms of pleasure of the moment, so blinded by desire, that
|
||||||
they cannot foresee</p>
|
they cannot foresee</p>
|
||||||
|
<p><img src="marker.png" width="300" height="300" alt="Test image"/></p>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>Testing EPUB CFI</title>
|
<title>Testing cfi.coffee</title>
|
||||||
<script type="text/javascript" src="cfi.coffee"></script>
|
<script type="text/javascript" src="cfi.coffee"></script>
|
||||||
<script type="text/javascript" src="cfi-test.coffee"></script>
|
<script type="text/javascript" src="cfi-test.coffee"></script>
|
||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
@ -46,7 +46,8 @@
|
|||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="container">
|
<div id="container">
|
||||||
<h1 id="first-h1">Testing EPUB CFI</h1>
|
<h1 id="first-h1">Testing cfi.coffee</h1>
|
||||||
|
<p>Click anywhere and the location will be marked with a marker, whose position is set via a CFI.</p>
|
||||||
<p><a id="reset" href="/">Reset CFI to None</a></p>
|
<p><a id="reset" href="/">Reset CFI to None</a></p>
|
||||||
<h2>A div with scrollbars</h2>
|
<h2>A div with scrollbars</h2>
|
||||||
<p>Scroll down and click on some elements. Make sure to hit both
|
<p>Scroll down and click on some elements. Make sure to hit both
|
||||||
|
@ -462,7 +462,7 @@ class Scheduler(QObject):
|
|||||||
delta = timedelta(days=self.oldest)
|
delta = timedelta(days=self.oldest)
|
||||||
try:
|
try:
|
||||||
ids = list(self.db.tags_older_than(_('News'),
|
ids = list(self.db.tags_older_than(_('News'),
|
||||||
delta))
|
delta, must_have_authors=['calibre']))
|
||||||
except:
|
except:
|
||||||
# Happens if library is being switched
|
# Happens if library is being switched
|
||||||
ids = []
|
ids = []
|
||||||
|
@ -362,7 +362,7 @@
|
|||||||
<item>
|
<item>
|
||||||
<widget class="QLabel" name="label_7">
|
<widget class="QLabel" name="label_7">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Delete downloaded news older than:</string>
|
<string>Delete downloaded news &older than:</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="buddy">
|
<property name="buddy">
|
||||||
<cstring>old_news</cstring>
|
<cstring>old_news</cstring>
|
||||||
|
@ -73,6 +73,9 @@ class JavaScriptLoader(object):
|
|||||||
src = self.get(x)
|
src = self.get(x)
|
||||||
evaljs(src)
|
evaljs(src)
|
||||||
|
|
||||||
|
if not lang:
|
||||||
|
lang = 'en'
|
||||||
|
|
||||||
def lang_name(l):
|
def lang_name(l):
|
||||||
l = l.lower()
|
l = l.lower()
|
||||||
l = lang_as_iso639_1(l)
|
l = lang_as_iso639_1(l)
|
||||||
|
@ -2002,7 +2002,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
|
|
||||||
############# End get_categories
|
############# End get_categories
|
||||||
|
|
||||||
def tags_older_than(self, tag, delta, must_have_tag=None):
|
def tags_older_than(self, tag, delta, must_have_tag=None,
|
||||||
|
must_have_authors=None):
|
||||||
'''
|
'''
|
||||||
Return the ids of all books having the tag ``tag`` that are older than
|
Return the ids of all books having the tag ``tag`` that are older than
|
||||||
than the specified time. tag comparison is case insensitive.
|
than the specified time. tag comparison is case insensitive.
|
||||||
@ -2011,6 +2012,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
the tag are returned.
|
the tag are returned.
|
||||||
:param must_have_tag: If not None the list of matches will be
|
:param must_have_tag: If not None the list of matches will be
|
||||||
restricted to books that have this tag
|
restricted to books that have this tag
|
||||||
|
:param must_have_authors: A list of authors. If not None the list of
|
||||||
|
matches will be restricted to books that have these authors (case
|
||||||
|
insensitive).
|
||||||
'''
|
'''
|
||||||
tag = tag.lower().strip()
|
tag = tag.lower().strip()
|
||||||
mht = must_have_tag.lower().strip() if must_have_tag else None
|
mht = must_have_tag.lower().strip() if must_have_tag else None
|
||||||
@ -2018,9 +2022,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
tindex = self.FIELD_MAP['timestamp']
|
tindex = self.FIELD_MAP['timestamp']
|
||||||
gindex = self.FIELD_MAP['tags']
|
gindex = self.FIELD_MAP['tags']
|
||||||
iindex = self.FIELD_MAP['id']
|
iindex = self.FIELD_MAP['id']
|
||||||
|
aindex = self.FIELD_MAP['authors']
|
||||||
|
mah = must_have_authors
|
||||||
|
if mah is not None:
|
||||||
|
mah = [x.replace(',', '|').lower() for x in mah]
|
||||||
|
mah = ','.join(mah)
|
||||||
for r in self.data._data:
|
for r in self.data._data:
|
||||||
if r is not None:
|
if r is not None:
|
||||||
if delta is None or (now - r[tindex]) > delta:
|
if delta is None or (now - r[tindex]) > delta:
|
||||||
|
if mah:
|
||||||
|
authors = r[aindex] or ''
|
||||||
|
if authors.lower() != mah:
|
||||||
|
continue
|
||||||
tags = r[gindex]
|
tags = r[gindex]
|
||||||
if tags:
|
if tags:
|
||||||
tags = [x.strip() for x in tags.lower().split(',')]
|
tags = [x.strip() for x in tags.lower().split(',')]
|
||||||
@ -3205,6 +3218,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
mi = get_metadata(stream, format, use_libprs_metadata=False,
|
mi = get_metadata(stream, format, use_libprs_metadata=False,
|
||||||
force_read_metadata=True)
|
force_read_metadata=True)
|
||||||
|
# Force the author to calibre as the auto delete of old news checks for
|
||||||
|
# both the author==calibre and the tag News
|
||||||
|
mi.authors = ['calibre']
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
if mi.series_index is None:
|
if mi.series_index is None:
|
||||||
mi.series_index = self.get_next_series_num_for(mi.series)
|
mi.series_index = self.get_next_series_num_for(mi.series)
|
||||||
|
@ -12,7 +12,7 @@ Utilities to help with developing coffeescript based apps.
|
|||||||
A coffeescript compiler and a simple web server that automatically serves
|
A coffeescript compiler and a simple web server that automatically serves
|
||||||
coffeescript files as javascript.
|
coffeescript files as javascript.
|
||||||
'''
|
'''
|
||||||
import sys, traceback, importlib, io
|
import sys, traceback, io
|
||||||
if sys.version_info.major > 2:
|
if sys.version_info.major > 2:
|
||||||
print('This script is not Python 3 compatible. Run it with Python 2',
|
print('This script is not Python 3 compatible. Run it with Python 2',
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
@ -22,125 +22,48 @@ import time, BaseHTTPServer, os, sys, re, SocketServer
|
|||||||
from threading import Lock
|
from threading import Lock
|
||||||
from SimpleHTTPServer import SimpleHTTPRequestHandler
|
from SimpleHTTPServer import SimpleHTTPRequestHandler
|
||||||
|
|
||||||
from PyQt4.QtWebKit import QWebPage
|
from PyQt4.Qt import QCoreApplication, QScriptEngine, QScriptValue
|
||||||
from PyQt4.Qt import QThread, QApplication
|
|
||||||
|
|
||||||
# Infrastructure {{{
|
class Compiler(QScriptEngine): # {{{
|
||||||
def import_from_calibre(mod):
|
|
||||||
try:
|
|
||||||
return importlib.import_module(mod)
|
|
||||||
except ImportError:
|
|
||||||
import init_calibre
|
|
||||||
init_calibre
|
|
||||||
return importlib.import_module(mod)
|
|
||||||
|
|
||||||
_store_app = gui_thread = None
|
|
||||||
def check_qt():
|
|
||||||
global gui_thread, _store_app
|
|
||||||
_plat = sys.platform.lower()
|
|
||||||
iswindows = 'win32' in _plat or 'win64' in _plat
|
|
||||||
isosx = 'darwin' in _plat
|
|
||||||
islinux = not (iswindows or isosx)
|
|
||||||
|
|
||||||
if islinux and ':' not in os.environ.get('DISPLAY', ''):
|
|
||||||
raise RuntimeError('X server required. If you are running on a'
|
|
||||||
' headless machine, use xvfb')
|
|
||||||
if _store_app is None and QApplication.instance() is None:
|
|
||||||
_store_app = QApplication([])
|
|
||||||
if gui_thread is None:
|
|
||||||
gui_thread = QThread.currentThread()
|
|
||||||
if gui_thread is not QThread.currentThread():
|
|
||||||
raise RuntimeError('Cannot use Qt in non GUI thread')
|
|
||||||
|
|
||||||
def fork_job(*args, **kwargs):
|
|
||||||
try:
|
|
||||||
return import_from_calibre('calibre.utils.ipc.simple_worker').fork_job(*args,
|
|
||||||
**kwargs)
|
|
||||||
except ImportError:
|
|
||||||
# We aren't running in calibre
|
|
||||||
import subprocess
|
|
||||||
raw, filename = kwargs['args']
|
|
||||||
cs = ''
|
|
||||||
try:
|
|
||||||
p = subprocess.Popen([sys.executable, __file__, 'compile', '-'],
|
|
||||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE)
|
|
||||||
if isinstance(raw, unicode):
|
|
||||||
raw = raw.encode('utf-8')
|
|
||||||
stdout, stderr = p.communicate(raw)
|
|
||||||
cs = stdout.decode('utf-8')
|
|
||||||
errors = [stderr]
|
|
||||||
except:
|
|
||||||
errors = [traceback.format_exc()]
|
|
||||||
|
|
||||||
return {'result':(cs, errors)}
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class Compiler(QWebPage): # {{{
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Never use this class in anything except the main thread. If you want to use
|
You can use this class in any thread, but make sure you instantiate it in
|
||||||
it from other threads, use the forked_compile method instead.
|
the main thread. Alternatively, construct a QCoreApplication in the main
|
||||||
|
thread, after which you can instantiate this class and use it in any
|
||||||
|
thread.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
check_qt()
|
if QCoreApplication.instance() is None:
|
||||||
QWebPage.__init__(self)
|
self.__app_ = QCoreApplication([])
|
||||||
self.frame = self.mainFrame()
|
|
||||||
self.filename = self._src = ''
|
|
||||||
self.frame.evaluateJavaScript(CS_JS)
|
|
||||||
self.frame.addToJavaScriptWindowObject("cs_compiler", self)
|
|
||||||
self.errors = []
|
|
||||||
|
|
||||||
def shouldInterruptJavaScript(self):
|
QScriptEngine.__init__(self)
|
||||||
return True
|
res = self.evaluate(CS_JS, 'coffee-script.js')
|
||||||
|
if res.isError():
|
||||||
def javaScriptConsoleMessage(self, msg, lineno, sourceid):
|
raise Exception('Failed to run the coffee script compiler: %s'%
|
||||||
sourceid = sourceid or self.filename or '<script>'
|
unicode(res.toString()))
|
||||||
self.errors.append('%s:%s'%(sourceid, msg))
|
self.lock = Lock()
|
||||||
|
|
||||||
def __evalcs(self, raw, filename):
|
|
||||||
# This method is NOT thread safe
|
|
||||||
self.filename = filename
|
|
||||||
self.setProperty('source', raw)
|
|
||||||
self.errors = []
|
|
||||||
res = self.frame.evaluateJavaScript('''
|
|
||||||
raw = document.getElementById("raw");
|
|
||||||
raw = cs_compiler.source;
|
|
||||||
CoffeeScript.compile(raw);
|
|
||||||
''')
|
|
||||||
ans = ''
|
|
||||||
if res.type() == res.String:
|
|
||||||
ans = unicode(res.toString())
|
|
||||||
return ans, list(self.errors)
|
|
||||||
|
|
||||||
def __call__(self, raw, filename=None):
|
def __call__(self, raw, filename=None):
|
||||||
if not isinstance(raw, unicode):
|
with self.lock:
|
||||||
raw = raw.decode('utf-8')
|
if not isinstance(raw, unicode):
|
||||||
return self.__evalcs(raw, filename)
|
raw = raw.decode('utf-8')
|
||||||
|
if not filename:
|
||||||
def forked_compile(raw, fname):
|
filename = '<string>'
|
||||||
# Entry point for the compile worker
|
go = self.globalObject()
|
||||||
try:
|
go.setProperty('coffee_src', QScriptValue(raw),
|
||||||
ans, errors = Compiler()(raw, fname)
|
go.ReadOnly|go.Undeletable)
|
||||||
except:
|
res = self.evaluate('this.CoffeeScript.compile(this.coffee_src)',
|
||||||
ans, errors = '', [traceback.format_exc()]
|
filename)
|
||||||
return ans, errors
|
if res.isError():
|
||||||
|
return '', [unicode(res.toString())]
|
||||||
|
return unicode(res.toString()), []
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def compile_coffeescript(raw, filename=None):
|
def compile_coffeescript(raw, filename=None):
|
||||||
try:
|
return Compiler()(raw, filename)
|
||||||
cs, errors = fork_job('calibre.utils.serve_coffee',
|
|
||||||
'forked_compile', args=(raw, filename), timeout=5,
|
|
||||||
no_output=True)['result']
|
|
||||||
except Exception as e:
|
|
||||||
cs = None
|
|
||||||
errors = [getattr(e, 'orig_tb', traceback.format_exc())]
|
|
||||||
|
|
||||||
return cs, errors
|
|
||||||
|
|
||||||
class HTTPRequestHandler(SimpleHTTPRequestHandler): # {{{
|
class HTTPRequestHandler(SimpleHTTPRequestHandler): # {{{
|
||||||
'''
|
'''
|
||||||
@ -317,7 +240,7 @@ class Handler(HTTPRequestHandler): # {{{
|
|||||||
mtime = time.time()
|
mtime = time.time()
|
||||||
with open(src, 'rb') as f:
|
with open(src, 'rb') as f:
|
||||||
raw = f.read()
|
raw = f.read()
|
||||||
cs, errors = compile_coffeescript(raw, src)
|
cs, errors = self.compiler(raw, src)
|
||||||
for line in errors:
|
for line in errors:
|
||||||
print(line, file=sys.stderr)
|
print(line, file=sys.stderr)
|
||||||
if not cs:
|
if not cs:
|
||||||
@ -351,6 +274,7 @@ class Server(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer): # {{{
|
|||||||
|
|
||||||
def serve(resources={}, port=8000, host='0.0.0.0'):
|
def serve(resources={}, port=8000, host='0.0.0.0'):
|
||||||
Handler.special_resources = resources
|
Handler.special_resources = resources
|
||||||
|
Handler.compiler = Compiler()
|
||||||
httpd = Server((host, port), Handler)
|
httpd = Server((host, port), Handler)
|
||||||
print('serving %s at %s:%d with PID=%d'%(os.getcwdu(), host, port, os.getpid()))
|
print('serving %s at %s:%d with PID=%d'%(os.getcwdu(), host, port, os.getpid()))
|
||||||
try:
|
try:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user