This commit is contained in:
GRiker 2012-01-20 16:13:14 -07:00
commit 18e83abe93
114 changed files with 36541 additions and 32829 deletions

View File

@ -5,7 +5,7 @@
# Also, each release can have new and improved recipes.
# - version: ?.?.?
# date: 2011-??-??
# date: 2012-??-??
#
# new features:
# - title:
@ -19,8 +19,68 @@
# new recipes:
# - title:
- version: 0.8.36
date: 2012-01-20
new features:
- title: "Decrease startup time for large libraries with at least one composite custom column by reading format info on demand"
- title: "When automatically deleting news older than x days, from the calibre library, only delete the book if it both has the tag News and the author calibre. This prevents accidental deletion of books tagged with News by the user."
- title: "Driver for Infibeam Pi 2"
- title: "Add a Tag Editor for tags like custom columns to the edit metadata dialog"
bug fixes:
- title: "E-book viewer: Fix regression in 0.8.35 that caused viewer to raise an error on books that did not define a language"
- title: "Content server: Fix grouping for categories based on custom columns."
tickets: [919011]
- title: "Edit metadata dialog: When setting the series from a format or via metadata download, ensure that the series index is not automatically changed, when closing the dialog."
tickets: [918751]
- title: "When reading metadata from Topaz (azw1) files, handle non ascii metadata correctly."
tickets: [917419]
- title: "CHM Input: Do not choke on CHM files with non ascii internal filenames on windows."
tickets: [917696]
- title: "Fix reading metadata from CHM files with non-ascii titles"
- title: "Fix HTML 5 parser choking on comments"
- title: "If calibre is started from a directory that does not exist, automatically use the home directory as the working directory, instead of crashing"
- title: "Fix iriver story HD Wi-Fi device and external SD card swapped"
tickets: [916364]
- title: "Content server: Fix ugly URLs for specific format download in the book details and permalink panels"
- title: "When adding FB2 files do not set the date field from the metadata in the file"
improved recipes:
- OReilly Premuim
- Variety
- Blic
- New Journal of Physics
- Der Tagesspiegel
new recipes:
- title: Tweakers.net
author: Roedi06
- title: Village Voice
author: Barty
- title: Edge.org Conversations
author: levien
- title: Novi list - printed edition
author: Darko Miletic
- version: 0.8.35
date: 2011-01-13
date: 2012-01-13
new features:
- title: "Metadata plugboards: Allow creation of plugboards for email delivery."

View File

@ -0,0 +1,50 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
'''
abc.net.au/news
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheDailyNewsEG(BasicNewsRecipe):
title = u'al-masry al-youm'
__author__ = 'Omm Mishmishah'
description = 'Independent News from Egypt'
masthead_url = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
cover_url = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
auto_cleanup = True
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = False
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'Independent News Egypt'
category = 'News, Egypt, World'
language = 'en_EG'
publication_type = 'newsportal'
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
preprocess_regexps = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': False
}
keep_only_tags = [dict(attrs={'class':['article section']})]
remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
'inline-content story left', 'inline-content map left contracted', 'published',
'story-map', 'statepromo', 'topics', ]})]
remove_attributes = ['width','height']
feeds = [(u'English News', u'http://www.almasryalyoum.com/en/rss_feed_term/113/rss.xml'),
(u'News Features', u'http://www.almasryalyoum.com/en/rss_feed_term/115/rss.xml'),
(u'Culture', u'http://www.almasryalyoum.com/en/rss_feed_term/133/rss.xml'),
(u'Cinema', u'http://www.almasryalyoum.com/en/rss_feed_term/134/rss.xml')
]

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
'''
blic.rs
'''
@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe):
def print_version(self, url):
return url + '/print'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_cover_url(self):
soup = self.index_to_soup('http://www.blic.rs/')
alink = soup.find('a', attrs={'id':'blic_naslovna_print'})
if alink:
return 'http://www.blic.rs' + alink['href']
return None

View File

@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):
use_embedded_content = False
remove_javascript = True
needs_subscription = True
needs_subscription = 'optional'
encoding= 'ISO-8859-1'
remove_tags_before = dict(name='font', attrs={'class':'date'})
@ -75,32 +75,30 @@ class ESPN(BasicNewsRecipe):
return soup
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_handle_refresh(False)
url = ('https://r.espn.go.com/members/v3_1/login')
raw = br.open(url).read()
raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
with TemporaryFile(suffix='.htm') as fname:
with open(fname, 'wb') as f:
f.write(raw)
br.open_local_file(fname)
if self.username and self.password:
br.set_handle_refresh(False)
url = ('https://r.espn.go.com/members/v3_1/login')
raw = br.open(url).read()
raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
with TemporaryFile(suffix='.htm') as fname:
with open(fname, 'wb') as f:
f.write(raw)
br.open_local_file(fname)
br.form = br.forms().next()
br.form.find_control(name='username', type='text').value = self.username
br.form['password'] = self.password
br.submit().read()
br.open('http://espn.go.com').read()
br.set_handle_refresh(True)
br.form = br.forms().next()
br.form.find_control(name='username', type='text').value = self.username
br.form['password'] = self.password
br.submit().read()
br.open('http://espn.go.com').read()
br.set_handle_refresh(True)
return br
def get_article_url(self, article):
return article.get('guid', None)
def print_version(self, url):
if 'eticket' in url:
return url.partition('&')[0].replace('story?', 'print?')
match = re.search(r'story\?(id=\d+)', url)

72
recipes/klip_me.recipe Normal file
View File

@ -0,0 +1,72 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1299694372(BasicNewsRecipe):
title = u'Klipme'
__author__ = 'Ken Sun'
publisher = 'Klip.me'
category = 'info, custom, Klip.me'
oldest_article = 365
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
remove_tags = [
dict(name='div', attrs={'id':'text_controls_toggle'})
,dict(name='script')
,dict(name='div', attrs={'id':'text_controls'})
,dict(name='div', attrs={'id':'editing_controls'})
,dict(name='div', attrs={'class':'bar bottom'})
]
use_embedded_content = False
needs_subscription = True
INDEX = u'http://www.klip.me'
LOGIN = INDEX + u'/fav/signin?callback=/fav'
feeds = [
(u'Klip.me unread', u'http://www.klip.me/fav'),
(u'Klip.me started', u'http://www.klip.me/fav?s=starred')
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None:
br.open(self.LOGIN)
br.select_form(nr=0)
br['Email'] = self.username
if self.password is not None:
br['Passwd'] = self.password
br.submit()
return br
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('table',attrs={'class':['item','item new']}):
atag = item.a
if atag and atag.has_key('href'):
url = atag['href']
articles.append({
'url' :url
})
totalfeeds.append((feedtitle, articles))
return totalfeeds
def print_version(self, url):
return 'http://www.klip.me' + url
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('title').contents[0].strip()
def postprocess_html(self, soup, first_fetch):
for link_tag in soup.findAll(attrs={"id" : "story"}):
link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
print link_tag
return soup

View File

@ -1,16 +1,35 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
##
## Title: Microwave Journal RSS recipe
## Contact: AprilHare, Darko Miletic <darko.miletic at gmail.com>
##
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
## Copyright: 2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>
##
## Written: 2008
## Last Edited: Jan 2012
##
'''
01-19-2012: Added GrayScale Image conversion and Duplicant article removals
'''
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
__copyright__ = '2008-2012, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
__version__ = 'v0.5.0'
__date__ = '2012-01-19'
__author__ = 'Darko Miletic'
'''
newscientist.com
'''
import re
import urllib
from calibre.utils.magick import Image
from calibre.web.feeds.news import BasicNewsRecipe
class NewScientist(BasicNewsRecipe):
title = 'New Scientist - Online News w. subscription'
__author__ = 'Darko Miletic'
description = 'Science news and science articles from New Scientist.'
language = 'en'
publisher = 'Reed Business Information Ltd.'
@ -39,10 +58,19 @@ class NewScientist(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
# more than one section). If True, only the first occurance will be downloaded.
filterDuplicates = True
# Whether to convert images to grayscale for eInk readers.
Convert_Grayscale = False
url_list = [] # This list is used to check if an article had already been included.
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('http://www.newscientist.com/')
if self.username is not None and self.password is not None:
if self.username is not None and self.password is not None:
br.open('https://www.newscientist.com/user/login')
data = urllib.urlencode({ 'source':'form'
,'redirectURL':''
@ -80,6 +108,10 @@ class NewScientist(BasicNewsRecipe):
return article.get('guid', None)
def print_version(self, url):
if self.filterDuplicates:
if url in self.url_list:
return
self.url_list.append(url)
return url + '?full=true&print=true'
def preprocess_html(self, soup):
@ -91,7 +123,7 @@ class NewScientist(BasicNewsRecipe):
item.name='p'
for item in soup.findAll(['xref','figref']):
tstr = item.string
item.replaceWith(tstr)
item.replaceWith(tstr)
for tg in soup.findAll('a'):
if tg.string == 'Home':
tg.parent.extract()
@ -101,3 +133,16 @@ class NewScientist(BasicNewsRecipe):
tg.replaceWith(tstr)
return soup
# Converts images to Gray Scale
def postprocess_html(self, soup, first):
if self.Convert_Grayscale:
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

View File

@ -1,8 +1,15 @@
# Talking Points is not grabbing everything.
# The look is right, but only the last one added?
import re
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
# Allows the Python soup converter, which makes parsing easier.
from calibre.ebooks.BeautifulSoup import BeautifulSoup
# strip ads and graphics
# Current Column lacks a title.
# Talking Points Memo - shorten title - Remove year and Bill's name
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
# Newsletters: Talking Points Memos covered by cat12
class OReillyPremium(BasicNewsRecipe):
title = u'OReilly Premium'
@ -19,7 +26,17 @@ class OReillyPremium(BasicNewsRecipe):
# Don't go down
recursions = 0
max_articles_per_feed = 2000
language = 'en'
debugMessages = True
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
@ -31,6 +48,8 @@ class OReillyPremium(BasicNewsRecipe):
br.submit()
return br
# Returns the best-guess print url.
# The second parameter (pageURL) is returned if nothing is found.
def extractPrintURL(self, baseURL, pageURL, printString):
tagURL = pageURL
soup = self.index_to_soup(pageURL)
@ -38,7 +57,6 @@ class OReillyPremium(BasicNewsRecipe):
printText = soup.find('a', text=printString)
else :
print("Failed to find Print string "+printString+ " in "+pageURL)
if printText:
tag = printText.parent
tagURL = baseURL+tag['href']
@ -47,177 +65,111 @@ class OReillyPremium(BasicNewsRecipe):
def stripBadChars(self, inString) :
return inString.replace("\'", "")
# returns a qualifying article list
def parseNoSpinArchives(self, baseURL, soupURL, debugMessages):
articleList = []
soup = self.index_to_soup(soupURL)
for div in soup.findAll(True, attrs={'class':['blogBody'], 'style':['padding-top:10px;']}):
a = div.find('a', href=True)
if not a:
continue
# re == regex. [href] is the link
url = baseURL
url +=re.sub(r'\?.*', '', a['href'])
# Get print version
printURL = self.extractPrintURL(baseURL, url, "Print this entry")
if printURL:
url = printURL
title = self.tag_to_string(a, use_alt=True).strip()
if debugMessages :
print("No Spin Archive Title:"+title+" at url: "+url)
description = 'None'
pubdate = time.strftime('%a, %d %b')
summary = div.find(True, attrs={'class':'summary'})
if summary:
description = self.tag_to_string(summary, use_alt=False)
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
return articleList
def parseTVArchives(self, baseURL, soupURL, debugMessages):
# TV Archives page has some Ajax, so look for the static only.
articleList = []
soup = self.index_to_soup(soupURL)
if debugMessages :
print("In parseTVArchives")
for div in soup.findAll('a', {'class':['showLinks','homeLinks']}):
a = div
url = baseURL
url +=a['href']
printURL = self.extractPrintURL(baseURL, url, "Print this entry")
if printURL:
url = printURL
title = self.tag_to_string(a, use_alt=True).strip()
title = self.stripBadChars(title)
if debugMessages :
print("TV Archive "+title+" at url: "+url)
description = 'None'
pubdate = time.strftime('%a, %d %b')
summary = div.find(True, attrs={'class':'summary'})
if summary:
description = self.tag_to_string(summary, use_alt=False)
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
if debugMessages :
print("Leaving TV Parse ")
return articleList
# Get Daily Briefing Archives
def parseDailyBriefs(self, baseURL, soupURL, debugMessages) :
print("Starting daily briefs")
articleList = []
soup = self.index_to_soup(soupURL)
for div in soup.findAll(True, attrs={'class':['defaultHeaderSmallLinks']}):
# re == regex. [href] is the link
url = baseURL
url +=re.sub(r'\?.*', '', div['href'])
printURL = self.extractPrintURL(baseURL, url, "Print this entry")
if printURL:
url = printURL
title = div.contents[0]
if debugMessages :
print("Daily Brief - title:"+title+" at url: "+url)
description = 'None'
pubdate = time.strftime('%a, %d %b')
summary = div.find(True, attrs={'class':'summary'})
if summary:
description = self.tag_to_string(summary, use_alt=False)
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
print("Leaving daily briefs")
return articleList
# Get the weekly Stratfor intelligence report
def parseStratfor(self, baseURL, soupURL, debugMessages):
# http://www.billoreilly.com/blog?categoryID=5
articleList = []
soup = self.index_to_soup(soupURL)
if debugMessages :
print("In parseStratfor")
a = soup.find('a', {'class':['blogLinks']})
url = baseURL
url +=a['href']
title = self.tag_to_string(a, use_alt=True).strip()
if debugMessages :
print("url: "+url)
print("title:"+title)
# Get Stratfor contents so we can get the real title.
stratSoup = self.index_to_soup(url)
title = stratSoup.html.head.title.string
stratIndex = title.find('Stratfor.com:', 0)
if (stratIndex > -1) :
title = title[stratIndex+14:-1]
# Look for first blogBody <td class="blogBody"
stratBody = stratSoup.find('td', {'class':['blogBody']})
if debugMessages :
print("Strat content title:"+title)
print("Strat body: "+ stratBody.contents[0])
description = 'None'
pubdate = time.strftime('%a, %d %b')
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
if debugMessages :
print("Leaving Stratfor Parse ")
return articleList
def parseTalkingPoints(self, baseURL, soupURL, debugMessages) :
# Look for blogDate. That's got the date... Then the next blogBody has the title. and then an anchor with class "homeBlogReadMore bold" has the URL.
articleList = []
soup = self.index_to_soup(soupURL)
if debugMessages :
print("Starting Talking Points")
topDate = soup.find("td", "blogBody")
if not topDate :
print("Failed to find date in Talking Points")
# This page has the contents in double-wrapped tables!
# tableParent = topDate.parent.parent
myTable = topDate.findParents('table')[0]
upOneTable = myTable.findParents('table')[0]
upTwo = upOneTable.findParents('table')[0]
# Now navigate rows of upTwo
if debugMessages :
print("Entering rows")
for rows in upTwo.findChildren("tr", recursive=False):
# Inside top level table, each row is an article
rowTable = rows.find("table")
articleTable = rowTable.find("table")
articleTable = rows.find("tr")
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
blogDate = articleTable.find("a","blogDate").contents[0]
# Skip to second blogBody for this.
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
# re == regex. [href] is the link
url = baseURL
url +=re.sub(r'\?.*', '', blogURL)
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
if debugMessages :
print("Talking Points Memo title "+title+" at url: "+url)
def parseGeneric(self, baseURL):
# Does a generic parsing of the articles. There are six categories (0-5)
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
# NoSpin and TV are generic
fullReturn = []
for i in range(len(self.catList)) :
articleList = []
soup = self.index_to_soup(self.catList[i][1])
# Set defaults
description = 'None'
pubdate = time.strftime('%a, %d %b')
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
print("Exiting parseTalkingPoints\n")
return articleList
# Problem: 0-2 create many in an array
# 3-5 create one.
# So no for-div for 3-5
def parseCurrentColumn(self, baseURL, soupURL, debugMessages) :
# Only needed to get the column title. Otherwise it's all good already; there's only one column
articleList = []
soup = self.index_to_soup(soupURL)
titleSpan = soup.find('span', {'class':['defaultHeader']})
title = titleSpan.contents[0]
# Get Print URL since it's available
printURL = self.extractPrintURL(baseURL, soupURL, "Print This Article")
if printURL:
print("Found print URL")
url = printURL
if debugMessages :
print("url: "+url)
print("title:"+title)
description = 'None'
pubdate = time.strftime('%a, %d %b')
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
if debugMessages :
print("Leaving Stratfor Parse ")
return articleList
if i < 3 :
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
print(div)
if i == 1:
a = div.find('a', href=True)
else :
a = div
print(a)
summary = div.find(True, attrs={'class':'summary'})
if summary:
description = self.tag_to_string(summary, use_alt=False)
if not a:
continue
# url = baseURL+re.sub(r'\?.*', '', a['href'])
url = baseURL+a['href']
if i < 2 :
url = self.extractPrintURL(baseURL, url, "Print this entry")
title = self.tag_to_string(a, use_alt=True).strip()
elif i == 2 :
# Daily Briefs
url = self.extractPrintURL(baseURL, url, "Print this entry")
title = div.contents[0]
if self.debugMessages :
print(title+" @ "+url)
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
elif i == 3 : # Stratfor
a = soup.find('a', self.catList[i][3])
if a is None :
continue
url = baseURL+a['href']
title = self.tag_to_string(a, use_alt=True).strip()
# Get Stratfor contents so we can get the real title.
stratSoup = self.index_to_soup(url)
title = stratSoup.html.head.title.string
stratIndex = title.find('Stratfor.com:', 0)
if (stratIndex > -1) :
title = title[stratIndex+14:-1]
# Look for first blogBody <td class="blogBody"
# Changed 12 Jan 2012 - new page format
#stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
#stratBody = stratSoup.find('td', {'class':['blogBody']})
elif i == 4 : # Talking Points
topDate = soup.find("td", "blogBody")
if not topDate :
print("Failed to find date in Talking Points")
# This page has the contents in double-wrapped tables!
myTable = topDate.findParents('table')[0]
if myTable is not None:
upOneTable = myTable.findParents('table')[0]
if upOneTable is not None:
upTwo = upOneTable.findParents('table')[0]
if upTwo is None:
continue
# Now navigate rows of upTwo
if self.debugMessages :
print("Entering rows")
for rows in upTwo.findChildren("tr", recursive=False):
# Inside top level table, each row is an article
rowTable = rows.find("table")
articleTable = rowTable.find("table")
# This looks wrong.
articleTable = rows.find("tr")
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
blogDate = articleTable.find("a","blogDate").contents[0]
# Skip to second blogBody for this.
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
url = baseURL+re.sub(r'\?.*', '', blogURL)
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
if self.debugMessages :
print("Talking Points Memo title "+title+" at url: "+url)
pubdate = time.strftime('%a, %d %b')
articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
else : # Current Column
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
if titleSpan is None :
continue
title = titleSpan.contents[0]
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
if i == 3 or i == 5 :
if self.debugMessages :
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
summary = div.find(True, attrs={'class':'summary'})
if summary:
description = self.tag_to_string(summary, use_alt=False)
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
self.catList[i][3] = articleList
fullReturn.append((self.catList[i][0], articleList))
return fullReturn
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
# returns a list of tuple ('feed title', list of articles)
@ -231,27 +183,8 @@ class OReillyPremium(BasicNewsRecipe):
# this is used instead of BasicNewsRecipe.parse_feeds().
def parse_index(self):
# Parse the page into Python Soup
debugMessages = True
baseURL = "https://www.billoreilly.com"
def feed_title(div):
return ''.join(div.findAll(text=True, recursive=False)).strip()
# [] is list, {} is empty mapping.
articleList = []
ans = []
showList = self.parseTVArchives(baseURL, 'https://www.billoreilly.com/show?action=tvShowArchive', debugMessages)
articleList = self.parseNoSpinArchives(baseURL, 'https://www.billoreilly.com/blog?categoryID=7', debugMessages)
stratList = self.parseStratfor(baseURL, 'http://www.billoreilly.com/blog?categoryID=5', debugMessages)
dailyBriefs = self.parseDailyBriefs(baseURL, 'http://www.billoreilly.com/blog?categoryID=11', debugMessages)
talkingPoints = self.parseTalkingPoints(baseURL, 'https://www.billoreilly.com/blog?categoryID=12', debugMessages)
currentColumn = self.parseCurrentColumn(baseURL, 'https://www.billoreilly.com/currentcolumn', debugMessages)
# Below, { x:y, a:b } creates a dictionary. We return a tuple of a title and list of dict...
# Lists are constructed with square brackets, separating items with commas: [a, b, c]. Tuples are constructed by the comma operator (not within square brackets), with or without enclosing parentheses, but an empty tuple must have the enclosing parentheses, such as a, b, c or (). A single item tuple must have a trailing comma, such as (d,).
# Shows first two if talking points and no spin news. Also if they are TV Shows ande Stratfor Weekly, also if Daily Briefing and Curren Column
# So all work individually. No idea why only getting first two in TOC now.
ans = [("Talking Points Memos", talkingPoints),("No Spin News", articleList),("TV Shows", showList),("Stratfor Weekly",stratList), ("Daily Briefing", dailyBriefs),("Current Column", currentColumn)]
if debugMessages :
print ans
return ans
return self.parseGeneric(baseURL)
def preprocess_html(self, soup):
refresh = soup.find('meta', {'http-equiv':'refresh'})

View File

@ -0,0 +1,46 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
'''
abc.net.au/news
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheDailyNewsEG(BasicNewsRecipe):
title = u'The Daily News Egypt'
__author__ = 'Omm Mishmishah'
description = 'News from Egypt'
masthead_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
cover_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
auto_cleanup = True
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = False
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'The Daily News Egypt'
category = 'News, Egypt, World'
language = 'en_EG'
publication_type = 'newsportal'
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
preprocess_regexps = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': False
}
keep_only_tags = [dict(attrs={'class':['article section']})]
remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
'inline-content story left', 'inline-content map left contracted', 'published',
'story-map', 'statepromo', 'topics', ]})]
remove_attributes = ['width','height']
feeds = [(u'The Daily News Egypt', u'http://www.thedailynewsegypt.com/rss.php?sectionid=all')]

View File

@ -0,0 +1,66 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__docformat__ = 'restructuredtext en'
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Tweakers(BasicNewsRecipe):
title = u'Tweakers.net - with Reactions'
__author__ = 'Roedi06'
language = 'nl'
oldest_article = 7
max_articles_per_feed = 100
cover_url = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif'
keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}),
{'id':'reacties'},
]
remove_tags = [dict(name='div', attrs={'id' : ['utracker']}),
{'id' : ['channelNav']},
{'id' : ['contentArea']},
{'class' : ['breadCrumb']},
{'class' : ['nextPrevious ellipsis']},
{'class' : ['advertorial']},
{'class' : ['sidebar']},
{'class' : ['filterBox']},
{'id' : ['toggleButtonTxt']},
{'id' : ['socialButtons']},
{'class' : ['button']},
{'class' : ['textadTop']},
{'class' : ['commentLink']},
{'title' : ['Reageer op deze reactie']},
{'class' : ['pageIndex']},
{'class' : ['reactieHeader collapsed']},
]
no_stylesheets=True
preprocess_regexps = [
(re.compile(r'<hr*?>', re.IGNORECASE | re.DOTALL), lambda match : ''),
(re.compile(r'<p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
(re.compile(r'</p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
(re.compile(r'<a.*?>'), lambda h1: '<b><u>'),
(re.compile(r'</a>'), lambda h2: '</u></b>'),
(re.compile(r'<span class="new">', re.IGNORECASE | re.DOTALL), lambda match : ''),
(re.compile(r'</span>', re.IGNORECASE | re.DOTALL), lambda match : ''),
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'), lambda match : ' - moderated 0<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'),
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'), lambda match : ' - moderated +1<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'),
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'), lambda match : ' - moderated +2<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'),
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'), lambda match : ' - moderated +3<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'),
(re.compile(r'<div class="moderation">.*?</div>'), lambda h1: ''),
]
extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \
.reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \
.quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }'
feeds = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')]
def print_version(self, url):
return url + '?max=200'

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.variety.com
'''
@ -14,11 +14,11 @@ class Variety(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
encoding = 'utf8'
publisher = 'Red Business Information'
category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
language = 'en'
masthead_url = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
masthead_url = 'http://images1.variety.com/graphics/variety/Variety_logo_green_tm.gif'
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
conversion_options = {
@ -30,17 +30,10 @@ class Variety(BasicNewsRecipe):
remove_tags = [dict(name=['object','link','map'])]
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
keep_only_tags = [dict(name='div', attrs={'class':'art control'})]
feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
def print_version(self, url):
rpt = url.rpartition('?')[0]
artid = rpt.rpartition('/')[2]
catidr = url.rpartition('categoryid=')[2]
catid = catidr.partition('&')[0]
return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
def preprocess_html(self, soup):
return self.adeify_images(soup)
rpt = url.rpartition('.html')[0]
return rpt + '?printerfriendly=true'

View File

@ -0,0 +1,46 @@
#!/usr/bin/env python
import re
from calibre.web.feeds.news import BasicNewsRecipe
class VillageVoice(BasicNewsRecipe):
title = 'Village Voice'
feeds = [
("Complete Issue", "http://villagevoice.com/syndication/issue"),
("News", "http://villagevoice.com/syndication/section/news"),
("Music", "http://villagevoice.com/syndication/section/music"),
("Movies", "http://villagevoice.com/syndication/section/film"),
#("Restaurants", "http://villagevoice.com/syndication/section/dining"),
#("Music Events", "http://villagevoice.com/syndication/events?type=music"),
#("Calendar Events", "http://villagevoice.com/syndication/events"),
#("Promotional Events", "http://villagevoice.com/syndication/promoEvents"),
#("Restaurant Guide", "http://villagevoice.com/syndication/restaurants/search")
]
auto_cleanup = True
max_articles_per_feed = 50
masthead_url = "http://assets.villagevoice.com/img/citylogo.png"
language = 'en'
__author__ = 'Barty'
seen_urls = []
# village voice breaks the article up into multiple pages, so
# parse page and grab the print url
url_regex = re.compile(r'\/content\/printVersion\/\d+',re.I)
def print_version(self, url):
if url in self.seen_urls:
return None
self.seen_urls.append( url)
soup = self.index_to_soup(url)
atag = soup.find('a',attrs={'href':self.url_regex})
if atag is None:
self.log('Warning: no print url found for '+url)
else:
m = self.url_regex.search(atag['href'])
if m:
url = 'http://www.villagevoice.com'+m.group(0)
return url

View File

@ -3,7 +3,7 @@
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>..:: calibre {library} ::.. {title}</title>
<meta http-equiv="X-UA-Compatible" content="IE=100" />
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
@ -58,7 +58,7 @@
method="post" title="Donate to support the development of calibre">
<div>
<input type="hidden" name="cmd" value="_s-xclick"></input>
<input type="hidden" name="hosted_button_id" value="3028915"></input>
<input type="hidden" name="hosted_button_id" value="MZQCP8EESW4H4"></input>
<input type="image"
src="{prefix}/static/button-donate.png"
name="submit"></input>

View File

@ -26,7 +26,11 @@ def login_to_google(username, password):
br.form['Email'] = username
br.form['Passwd'] = password
raw = br.submit().read()
if b'<title>Account overview - Account Settings</title>' not in raw:
if re.search(br'<title>.*?Account Settings</title>', raw) is None:
x = re.search(br'(?is)<title>.*?</title>', raw)
if x is not None:
print ('Title of post login page: %s'%x.group())
#open('/tmp/goog.html', 'wb').write(raw)
raise ValueError(('Failed to login to google with credentials: %s %s'
'\nGoogle sometimes requires verification when logging in from a '
'new IP address. Use lynx to login and supply the verification, '

View File

@ -18,14 +18,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-01-08 20:03+0000\n"
"Last-Translator: Simeon <Unknown>\n"
"PO-Revision-Date: 2012-01-14 02:30+0000\n"
"Last-Translator: Wolfgang Rohdewald <wolfgang@rohdewald.de>\n"
"Language-Team: German <debian-l10n-german@lists.debian.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-01-09 04:49+0000\n"
"X-Generator: Launchpad (build 14640)\n"
"X-Launchpad-Export-Date: 2012-01-15 05:18+0000\n"
"X-Generator: Launchpad (build 14664)\n"
"Language: de\n"
#. name for aaa

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 8, 35)
numeric_version = (0, 8, 36)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -14,6 +14,7 @@ from functools import wraps, partial
from calibre.db.locking import create_locks, RecordLock
from calibre.db.fields import create_field
from calibre.db.tables import VirtualTable
from calibre.db.lazy import FormatMetadata, FormatsList
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import now
@ -127,14 +128,8 @@ class Cache(object):
if not formats:
good_formats = None
else:
good_formats = []
for f in formats:
try:
mi.format_metadata[f] = self._format_metadata(book_id, f)
except:
pass
else:
good_formats.append(f)
mi.format_metadata = FormatMetadata(self, id, formats)
good_formats = FormatsList(formats, mi.format_metadata)
mi.formats = good_formats
mi.has_cover = _('Yes') if self._field_for('cover', book_id,
default_value=False) else ''

99
src/calibre/db/lazy.py Normal file
View File

@ -0,0 +1,99 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import weakref
from functools import wraps
from collections import MutableMapping, MutableSequence
'''
Avoid doing stats on all files in a book when getting metadata for that book.
Speeds up calibre startup with large libraries/libraries on a network share,
with a composite custom column.
'''
# Lazy format metadata retrieval {{{
def resolved(f):
@wraps(f)
def wrapper(self, *args, **kwargs):
if getattr(self, '_must_resolve', True):
self._resolve()
self._must_resolve = False
return f(self, *args, **kwargs)
return wrapper
class MutableBase(object):
@resolved
def __str__(self):
return str(self._values)
@resolved
def __repr__(self):
return repr(self._values)
@resolved
def __unicode__(self):
return unicode(self._values)
@resolved
def __len__(self):
return len(self._values)
@resolved
def __iter__(self):
return iter(self._values)
@resolved
def __contains__(self, key):
return key in self._values
@resolved
def __getitem__(self, fmt):
return self._values[fmt]
@resolved
def __setitem__(self, key, val):
self._values[key] = val
@resolved
def __delitem__(self, key):
del self._values[key]
class FormatMetadata(MutableBase, MutableMapping):
def __init__(self, db, id_, formats):
self._dbwref = weakref.ref(db)
self._id = id_
self._formats = formats
def _resolve(self):
db = self._dbwref()
self._values = {}
for f in self._formats:
try:
self._values[f] = db.format_metadata(self._id, f)
except:
pass
class FormatsList(MutableBase, MutableSequence):
def __init__(self, formats, format_metadata):
self._formats = formats
self._format_metadata = format_metadata
def _resolve(self):
self._values = [f for f in self._formats if f in self._format_metadata]
@resolved
def insert(self, idx, val):
self._values.insert(idx, val)
# }}}

View File

@ -162,7 +162,7 @@ class ANDROID(USBMS):
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO']
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -175,7 +175,7 @@ class ANDROID(USBMS):
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET']
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',

View File

@ -11,6 +11,7 @@ from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
from calibre.constants import filesystem_encoding
class CHMInput(InputFormatPlugin):
@ -36,6 +37,8 @@ class CHMInput(InputFormatPlugin):
log.debug('Processing CHM...')
with TemporaryDirectory('_chm2oeb') as tdir:
if not isinstance(tdir, unicode):
tdir = tdir.decode(filesystem_encoding)
html_input = plugin_for_input_format('html')
for opt in html_input.options:
setattr(options, opt.option.name, opt.recommended_value)

View File

@ -6,13 +6,14 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
import re, codecs
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import string_to_authors, MetaInformation
from calibre.utils.logging import default_log
from calibre.ptempfile import TemporaryFile
from calibre import force_unicode
def _clean(s):
return s.replace(u'\u00a0', u' ')
@ -138,6 +139,13 @@ def get_metadata_from_reader(rdr):
resolve_entities=True)[0])
title = rdr.title
try:
x = rdr.GetEncoding()
codecs.lookup(x)
enc = x
except:
enc = 'cp1252'
title = force_unicode(title, enc)
authors = _get_authors(home)
mi = MetaInformation(title, authors)
publisher = _get_publisher(home)

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
' and Alex Bramley <a.bramley at gmail.com>.'
import os, re
import os, re, codecs
from calibre import guess_type as guess_mimetype
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
@ -99,8 +99,17 @@ class CHMReader(CHMFile):
def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
html_files = set([])
try:
x = self.GetEncoding()
codecs.lookup(x)
enc = x
except:
enc = 'cp1252'
for path in self.Contents():
lpath = os.path.join(output_dir, path)
fpath = path
if not isinstance(path, unicode):
fpath = path.decode(enc)
lpath = os.path.join(output_dir, fpath)
self._ensure_dir(lpath)
try:
data = self.GetFile(path)
@ -123,6 +132,7 @@ class CHMReader(CHMFile):
self.log.warn('%r filename too long, skipping'%path)
continue
raise
if debug_dump:
import shutil
shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))

View File

@ -8,6 +8,7 @@ import StringIO, sys
from struct import pack
from calibre.ebooks.metadata import MetaInformation
from calibre import force_unicode
class StreamSlicer(object):
@ -245,7 +246,9 @@ class MetadataUpdater(object):
def get_metadata(self):
''' Return MetaInformation with title, author'''
self.get_original_metadata()
return MetaInformation(self.metadata['Title'], [self.metadata['Authors']])
title = force_unicode(self.metadata['Title'], 'utf-8')
authors = force_unicode(self.metadata['Authors'], 'utf-8').split(';')
return MetaInformation(title, authors)
def get_original_metadata(self):
offset = self.base + self.topaz_headers['metadata']['blocks'][0]['offset']

View File

@ -13,6 +13,7 @@
3. Much more comprehensive testing/error handling
4. Properly encodes/decodes assertions
5. Handles points in the padding of elements consistently
6. Has a utility method to calculate the CFI for the current viewport position robustly
To check if this script is compatible with the current browser, call
window.cfi.is_compatible() it will throw an exception if not compatible.
@ -72,7 +73,7 @@ get_current_time = (target) -> # {{{
fstr(ans)
# }}}
window_scroll_pos = (win) -> # {{{
window_scroll_pos = (win=window) -> # {{{
if typeof(win.pageXOffset) == 'number'
x = win.pageXOffset
y = win.pageYOffset
@ -86,7 +87,7 @@ window_scroll_pos = (win) -> # {{{
return [x, y]
# }}}
viewport_to_document = (x, y, doc) -> # {{{
viewport_to_document = (x, y, doc=window?.document) -> # {{{
until doc == window.document
# We are in a frame
frame = doc.defaultView.frameElement
@ -101,7 +102,7 @@ viewport_to_document = (x, y, doc) -> # {{{
return [x, y]
# }}}
# Equivalent for caretRangeFromPoint for non WebKit browsers {{{
# Convert point to character offset {{{
range_has_point = (range, x, y) ->
for rect in range.getClientRects()
if (rect.left <= x <= rect.right) and (rect.top <= y <= rect.bottom)
@ -157,7 +158,8 @@ class CanonicalFragmentIdentifier
is_compatible(): Throws an error if the browser is not compatible with
this script
at(x, y): which maps a point to a CFI, if possible
at(x, y): Maps a point to a CFI, if possible
at_current(): Returns the CFI corresponding to the current viewport scroll location
scroll_to(cfi): which scrolls the browser to a point corresponding to the
given cfi, and returns the x and y co-ordinates of the point.
@ -559,11 +561,73 @@ class CanonicalFragmentIdentifier
null
# }}}
current_cfi: () -> # {{{
at_current: () -> # {{{
[winx, winy] = window_scroll_pos()
[winw, winh] = [window.innerWidth, window.innerHeight]
max = Math.max
winw = max(winw, 400)
winh = max(winh, 600)
deltay = Math.floor(winh/50)
deltax = Math.floor(winw/25)
miny = max(-winy, -winh)
maxy = winh
minx = max(-winx, -winw)
maxx = winw
dist = (p1, p2) ->
Math.sqrt(Math.pow(p1[0]-p2[0], 2), Math.pow(p1[1]-p2[1], 2))
get_cfi = (ox, oy) ->
try
cfi = this.at(ox, oy)
point = this.point(cfi)
catch err
cfi = null
if point.range != null
r = point.range
rect = r.getClientRects()[0]
x = (point.a*rect.left + (1-point.a)*rect.right)
y = (rect.top + rect.bottom)/2
[x, y] = viewport_to_document(x, y, r.startContainer.ownerDocument)
else
node = point.node
r = node.getBoundingClientRect()
[x, y] = viewport_to_document(r.left, r.top, node.ownerDocument)
if typeof(point.x) == 'number' and node.offsetWidth
x += (point.x*node.offsetWidth)/100
if typeof(point.y) == 'number' and node.offsetHeight
y += (point.y*node.offsetHeight)/100
if dist(viewport_to_document(ox, oy), [x, y]) > 50
cfi = null
return cfi
x_loop = (cury) ->
for direction in [-1, 1]
delta = deltax * direction
curx = 0
until (direction < 0 and curx < minx) or (direction > 0 and curx > maxx)
cfi = get_cfi(curx, cury)
if cfi
return cfi
curx += delta
null
for direction in [-1, 1]
delta = deltay * direction
cury = 0
until (direction < 0 and cury < miny) or (direction > 0 and cury > maxy)
cfi = x_loop(cury, -1)
if cfi
return cfi
cury += delta
# TODO: Return the CFI corresponding to the <body> tag
null
# }}}
if window?

View File

@ -23,6 +23,7 @@
indignation and dislike men who are so beguiled and demoralized by
the charms of pleasure of the moment, so blinded by desire, that
they cannot foresee</p>
<p><img src="marker.png" width="300" height="300" alt="Test image"/></p>
</body>
</html>

View File

@ -1,7 +1,7 @@
<!DOCTYPE html>
<html>
<head>
<title>Testing EPUB CFI</title>
<title>Testing cfi.coffee</title>
<script type="text/javascript" src="cfi.coffee"></script>
<script type="text/javascript" src="cfi-test.coffee"></script>
<style type="text/css">
@ -46,7 +46,8 @@
</head>
<body>
<div id="container">
<h1 id="first-h1">Testing EPUB CFI</h1>
<h1 id="first-h1">Testing cfi.coffee</h1>
<p>Click anywhere and the location will be marked with a marker, whose position is set via a CFI.</p>
<p><a id="reset" href="/">Reset CFI to None</a></p>
<h2>A div with scrollbars</h2>
<p>Scroll down and click on some elements. Make sure to hit both

View File

@ -103,7 +103,7 @@ def html5_parse(data, max_nesting_depth=100):
xmlns_declaration = '{%s}'%XMLNS_NS
non_html5_namespaces = {}
seen_namespaces = set()
for elem in tuple(data.iter()):
for elem in tuple(data.iter(tag=etree.Element)):
elem.attrib.pop('xmlns', None)
namespaces = {}
for x in tuple(elem.attrib):

View File

@ -462,7 +462,7 @@ class Scheduler(QObject):
delta = timedelta(days=self.oldest)
try:
ids = list(self.db.tags_older_than(_('News'),
delta))
delta, must_have_authors=['calibre']))
except:
# Happens if library is being switched
ids = []

View File

@ -362,7 +362,7 @@
<item>
<widget class="QLabel" name="label_7">
<property name="text">
<string>&amp;Delete downloaded news older than:</string>
<string>Delete downloaded news &amp;older than:</string>
</property>
<property name="buddy">
<cstring>old_news</cstring>

View File

@ -573,6 +573,9 @@ class SeriesIndexEdit(QDoubleSpinBox):
import traceback
traceback.print_exc()
def reset_original(self):
self.original_series_name = self.series_edit.current_val
def break_cycles(self):
try:
self.series_edit.currentIndexChanged.disconnect()

View File

@ -376,6 +376,7 @@ class MetadataSingleDialogBase(ResizableDialog):
if not mi.is_null('series') and mi.series.strip():
self.series.current_val = mi.series
if mi.series_index is not None:
self.series_index.reset_original()
self.series_index.current_val = float(mi.series_index)
if not mi.is_null('languages'):
langs = [canonicalize_lang(x) for x in mi.languages]

View File

@ -325,6 +325,7 @@ class Preferences(QMainWindow):
return
rc = self.showing_widget.restart_critical
self.committed = True
do_restart = False
if must_restart:
self.must_restart = True
msg = _('Some of the changes you made require a restart.'
@ -335,12 +336,24 @@ class Preferences(QMainWindow):
'set any more preferences, until you restart.')
warning_dialog(self, _('Restart needed'), msg, show=True,
d = warning_dialog(self, _('Restart needed'), msg,
show_copy_button=False)
b = d.bb.addButton(_('Restart calibre now'), d.bb.AcceptRole)
b.setIcon(QIcon(I('lt.png')))
d.do_restart = False
def rf():
d.do_restart = True
b.clicked.connect(rf)
d.set_details('')
d.exec_()
b.clicked.disconnect()
do_restart = d.do_restart
self.showing_widget.refresh_gui(self.gui)
self.hide_plugin()
if self.close_after_initial or (must_restart and rc):
if self.close_after_initial or (must_restart and rc) or do_restart:
self.close()
if do_restart:
self.gui.quit(restart=True)
def cancel(self, *args):

View File

@ -73,6 +73,9 @@ class JavaScriptLoader(object):
src = self.get(x)
evaljs(src)
if not lang:
lang = 'en'
def lang_name(l):
l = l.lower()
l = lang_as_iso639_1(l)

View File

@ -40,6 +40,7 @@ from calibre.utils.magick.draw import save_cover_data_to
from calibre.utils.recycle_bin import delete_file, delete_tree
from calibre.utils.formatter_functions import load_user_template_functions
from calibre.db.errors import NoSuchFormat
from calibre.db.lazy import FormatMetadata, FormatsList
from calibre.utils.localization import (canonicalize_lang,
calibre_langcode_to_name)
@ -81,7 +82,6 @@ class Tag(object):
def __repr__(self):
return str(self)
class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'''
An ebook metadata database that stores references to ebook files on disk.
@ -170,6 +170,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
except:
traceback.print_exc()
self.field_metadata = FieldMetadata()
self.format_filename_cache = defaultdict(dict)
self._library_id_ = None
# Create the lock to be used to guard access to the metadata writer
# queues. This must be an RLock, not a Lock
@ -310,6 +311,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if not self.is_second_db:
load_user_template_functions(self.prefs.get('user_template_functions', []))
# Load the format filename cache
self.format_filename_cache = defaultdict(dict)
for book_id, fmt, name in self.conn.get(
'SELECT book,format,name FROM data'):
self.format_filename_cache[book_id][fmt.upper() if fmt else ''] = name
self.conn.executescript('''
DROP TRIGGER IF EXISTS author_insert_trg;
CREATE TEMP TRIGGER author_insert_trg
@ -599,7 +606,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
fname = self.construct_file_name(id)
changed = False
for format in formats:
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
name = self.format_filename_cache[id].get(format.upper(), None)
if name and name != fname:
changed = True
break
@ -944,14 +951,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
good_formats = None
else:
formats = sorted(formats.split(','))
good_formats = []
for f in formats:
try:
mi.format_metadata[f] = self.format_metadata(id, f)
except:
pass
else:
good_formats.append(f)
mi.format_metadata = FormatMetadata(self, id, formats)
good_formats = FormatsList(formats, mi.format_metadata)
mi.formats = good_formats
tags = row[fm['tags']]
if tags:
@ -1145,12 +1146,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def format_files(self, index, index_is_id=False):
id = index if index_is_id else self.id(index)
try:
formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,))
formats = map(lambda x:(x[0], x[1]), formats)
return formats
except:
return []
return [(v, k) for k, v in self.format_filename_cache[id].iteritems()]
def formats(self, index, index_is_id=False, verify_formats=True):
''' Return available formats as a comma separated list or None if there are no available formats '''
@ -1236,7 +1232,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'''
id = index if index_is_id else self.id(index)
try:
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
name = self.format_filename_cache[id][format.upper()]
except:
return None
if name:
@ -1333,11 +1329,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def add_format(self, index, format, stream, index_is_id=False, path=None,
notify=True, replace=True):
id = index if index_is_id else self.id(index)
if format:
self.format_metadata_cache[id].pop(format.upper(), None)
if not format: format = ''
self.format_metadata_cache[id].pop(format.upper(), None)
name = self.format_filename_cache[id].get(format.upper(), None)
if path is None:
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
if name and not replace:
return False
name = self.construct_file_name(id)
@ -1355,6 +1351,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
(id, format.upper(), size, name))
self.conn.commit()
self.format_filename_cache[id][format.upper()] = name
self.refresh_ids([id])
if notify:
self.notify('metadata', [id])
@ -1402,9 +1399,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def remove_format(self, index, format, index_is_id=False, notify=True,
commit=True, db_only=False):
id = index if index_is_id else self.id(index)
if format:
self.format_metadata_cache[id].pop(format.upper(), None)
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
if not format: format = ''
self.format_metadata_cache[id].pop(format.upper(), None)
name = self.format_filename_cache[id].pop(format.upper(), None)
if name:
if not db_only:
try:
@ -1925,7 +1922,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
############# End get_categories
def tags_older_than(self, tag, delta, must_have_tag=None):
def tags_older_than(self, tag, delta, must_have_tag=None,
must_have_authors=None):
'''
Return the ids of all books having the tag ``tag`` that are older than
than the specified time. tag comparison is case insensitive.
@ -1934,6 +1932,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
the tag are returned.
:param must_have_tag: If not None the list of matches will be
restricted to books that have this tag
:param must_have_authors: A list of authors. If not None the list of
matches will be restricted to books that have these authors (case
insensitive).
'''
tag = tag.lower().strip()
mht = must_have_tag.lower().strip() if must_have_tag else None
@ -1941,9 +1942,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
tindex = self.FIELD_MAP['timestamp']
gindex = self.FIELD_MAP['tags']
iindex = self.FIELD_MAP['id']
aindex = self.FIELD_MAP['authors']
mah = must_have_authors
if mah is not None:
mah = [x.replace(',', '|').lower() for x in mah]
mah = ','.join(mah)
for r in self.data._data:
if r is not None:
if delta is None or (now - r[tindex]) > delta:
if mah:
authors = r[aindex] or ''
if authors.lower() != mah:
continue
tags = r[gindex]
if tags:
tags = [x.strip() for x in tags.lower().split(',')]
@ -3128,6 +3138,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
stream.seek(0)
mi = get_metadata(stream, format, use_libprs_metadata=False,
force_read_metadata=True)
# Force the author to calibre as the auto delete of old news checks for
# both the author==calibre and the tag News
mi.authors = ['calibre']
stream.seek(0)
if mi.series_index is None:
mi.series_index = self.get_next_series_num_for(mi.series)

View File

@ -497,7 +497,8 @@ class BrowseServer(object):
xml(s, True),
xml(_('Loading, please wait'))+'&hellip;',
unicode(c),
xml(u'/browse/category_group/%s/%s'%(category,
xml(u'/browse/category_group/%s/%s'%(
hexlify(category.encode('utf-8')),
hexlify(s.encode('utf-8'))), True),
self.opts.url_prefix)
for s, c in category_groups.items()]
@ -531,6 +532,13 @@ class BrowseServer(object):
sort = None
if sort not in ('rating', 'name', 'popularity'):
sort = 'name'
try:
category = unhexlify(category)
if isbytestring(category):
category = category.decode('utf-8')
except:
raise cherrypy.HTTPError(404, 'invalid category')
categories = self.categories_cache()
if category not in categories:
raise cherrypy.HTTPError(404, 'category not found')

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 733 B

After

Width:  |  Height:  |  Size: 2.3 KiB

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More