mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
0.8.36
This commit is contained in:
commit
18e83abe93
@ -5,7 +5,7 @@
|
||||
# Also, each release can have new and improved recipes.
|
||||
|
||||
# - version: ?.?.?
|
||||
# date: 2011-??-??
|
||||
# date: 2012-??-??
|
||||
#
|
||||
# new features:
|
||||
# - title:
|
||||
@ -19,8 +19,68 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.36
|
||||
date: 2012-01-20
|
||||
|
||||
new features:
|
||||
- title: "Decrease startup time for large libraries with at least one composite custom column by reading format info on demand"
|
||||
|
||||
- title: "When automatically deleting news older than x days, from the calibre library, only delete the book if it both has the tag News and the author calibre. This prevents accidental deletion of books tagged with News by the user."
|
||||
|
||||
- title: "Driver for Infibeam Pi 2"
|
||||
|
||||
- title: "Add a Tag Editor for tags like custom columns to the edit metadata dialog"
|
||||
|
||||
bug fixes:
|
||||
- title: "E-book viewer: Fix regression in 0.8.35 that caused viewer to raise an error on books that did not define a language"
|
||||
|
||||
- title: "Content server: Fix grouping for categories based on custom columns."
|
||||
tickets: [919011]
|
||||
|
||||
- title: "Edit metadata dialog: When setting the series from a format or via metadata download, ensure that the series index is not automatically changed, when closing the dialog."
|
||||
tickets: [918751]
|
||||
|
||||
- title: "When reading metadata from Topaz (azw1) files, handle non ascii metadata correctly."
|
||||
tickets: [917419]
|
||||
|
||||
- title: "CHM Input: Do not choke on CHM files with non ascii internal filenames on windows."
|
||||
tickets: [917696]
|
||||
|
||||
- title: "Fix reading metadata from CHM files with non-ascii titles"
|
||||
|
||||
- title: "Fix HTML 5 parser choking on comments"
|
||||
|
||||
- title: "If calibre is started from a directory that does not exist, automatically use the home directory as the working directory, instead of crashing"
|
||||
|
||||
- title: "Fix iriver story HD Wi-Fi device and external SD card swapped"
|
||||
tickets: [916364]
|
||||
|
||||
- title: "Content server: Fix ugly URLs for specific format download in the book details and permalink panels"
|
||||
|
||||
- title: "When adding FB2 files do not set the date field from the metadata in the file"
|
||||
|
||||
improved recipes:
|
||||
- OReilly Premuim
|
||||
- Variety
|
||||
- Blic
|
||||
- New Journal of Physics
|
||||
- Der Tagesspiegel
|
||||
|
||||
new recipes:
|
||||
- title: Tweakers.net
|
||||
author: Roedi06
|
||||
|
||||
- title: Village Voice
|
||||
author: Barty
|
||||
|
||||
- title: Edge.org Conversations
|
||||
author: levien
|
||||
|
||||
- title: Novi list - printed edition
|
||||
author: Darko Miletic
|
||||
|
||||
- version: 0.8.35
|
||||
date: 2011-01-13
|
||||
date: 2012-01-13
|
||||
|
||||
new features:
|
||||
- title: "Metadata plugboards: Allow creation of plugboards for email delivery."
|
||||
|
50
recipes/al_masry_al_youm.recipe
Normal file
50
recipes/al_masry_al_youm.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
|
||||
'''
|
||||
abc.net.au/news
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class TheDailyNewsEG(BasicNewsRecipe):
|
||||
title = u'al-masry al-youm'
|
||||
__author__ = 'Omm Mishmishah'
|
||||
description = 'Independent News from Egypt'
|
||||
masthead_url = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
|
||||
cover_url = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
|
||||
|
||||
auto_cleanup = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'Independent News Egypt'
|
||||
category = 'News, Egypt, World'
|
||||
language = 'en_EG'
|
||||
publication_type = 'newsportal'
|
||||
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
|
||||
preprocess_regexps = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': False
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['article section']})]
|
||||
|
||||
remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
|
||||
'inline-content story left', 'inline-content map left contracted', 'published',
|
||||
'story-map', 'statepromo', 'topics', ]})]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [(u'English News', u'http://www.almasryalyoum.com/en/rss_feed_term/113/rss.xml'),
|
||||
(u'News Features', u'http://www.almasryalyoum.com/en/rss_feed_term/115/rss.xml'),
|
||||
(u'Culture', u'http://www.almasryalyoum.com/en/rss_feed_term/133/rss.xml'),
|
||||
(u'Cinema', u'http://www.almasryalyoum.com/en/rss_feed_term/134/rss.xml')
|
||||
]
|
@ -1,6 +1,6 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blic.rs
|
||||
'''
|
||||
@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
return url + '/print'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.blic.rs/')
|
||||
alink = soup.find('a', attrs={'id':'blic_naslovna_print'})
|
||||
if alink:
|
||||
return 'http://www.blic.rs' + alink['href']
|
||||
return None
|
||||
|
@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):
|
||||
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
needs_subscription = 'optional'
|
||||
encoding= 'ISO-8859-1'
|
||||
|
||||
remove_tags_before = dict(name='font', attrs={'class':'date'})
|
||||
@ -75,32 +75,30 @@ class ESPN(BasicNewsRecipe):
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.set_handle_refresh(False)
|
||||
url = ('https://r.espn.go.com/members/v3_1/login')
|
||||
raw = br.open(url).read()
|
||||
raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
|
||||
with TemporaryFile(suffix='.htm') as fname:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(raw)
|
||||
br.open_local_file(fname)
|
||||
if self.username and self.password:
|
||||
br.set_handle_refresh(False)
|
||||
url = ('https://r.espn.go.com/members/v3_1/login')
|
||||
raw = br.open(url).read()
|
||||
raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
|
||||
with TemporaryFile(suffix='.htm') as fname:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(raw)
|
||||
br.open_local_file(fname)
|
||||
|
||||
br.form = br.forms().next()
|
||||
br.form.find_control(name='username', type='text').value = self.username
|
||||
br.form['password'] = self.password
|
||||
br.submit().read()
|
||||
br.open('http://espn.go.com').read()
|
||||
br.set_handle_refresh(True)
|
||||
br.form = br.forms().next()
|
||||
br.form.find_control(name='username', type='text').value = self.username
|
||||
br.form['password'] = self.password
|
||||
br.submit().read()
|
||||
br.open('http://espn.go.com').read()
|
||||
br.set_handle_refresh(True)
|
||||
return br
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def print_version(self, url):
|
||||
|
||||
if 'eticket' in url:
|
||||
return url.partition('&')[0].replace('story?', 'print?')
|
||||
match = re.search(r'story\?(id=\d+)', url)
|
||||
|
72
recipes/klip_me.recipe
Normal file
72
recipes/klip_me.recipe
Normal file
@ -0,0 +1,72 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
||||
title = u'Klipme'
|
||||
__author__ = 'Ken Sun'
|
||||
publisher = 'Klip.me'
|
||||
category = 'info, custom, Klip.me'
|
||||
oldest_article = 365
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'text_controls_toggle'})
|
||||
,dict(name='script')
|
||||
,dict(name='div', attrs={'id':'text_controls'})
|
||||
,dict(name='div', attrs={'id':'editing_controls'})
|
||||
,dict(name='div', attrs={'class':'bar bottom'})
|
||||
]
|
||||
use_embedded_content = False
|
||||
needs_subscription = True
|
||||
INDEX = u'http://www.klip.me'
|
||||
LOGIN = INDEX + u'/fav/signin?callback=/fav'
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Klip.me unread', u'http://www.klip.me/fav'),
|
||||
(u'Klip.me started', u'http://www.klip.me/fav?s=starred')
|
||||
]
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(nr=0)
|
||||
br['Email'] = self.username
|
||||
if self.password is not None:
|
||||
br['Passwd'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('table',attrs={'class':['item','item new']}):
|
||||
atag = item.a
|
||||
if atag and atag.has_key('href'):
|
||||
url = atag['href']
|
||||
articles.append({
|
||||
'url' :url
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
def print_version(self, url):
|
||||
return 'http://www.klip.me' + url
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.title = soup.find('title').contents[0].strip()
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for link_tag in soup.findAll(attrs={"id" : "story"}):
|
||||
link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
|
||||
print link_tag
|
||||
|
||||
return soup
|
||||
|
@ -1,16 +1,35 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
|
||||
##
|
||||
## Title: Microwave Journal RSS recipe
|
||||
## Contact: AprilHare, Darko Miletic <darko.miletic at gmail.com>
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
## Copyright: 2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>
|
||||
##
|
||||
## Written: 2008
|
||||
## Last Edited: Jan 2012
|
||||
##
|
||||
|
||||
'''
|
||||
01-19-2012: Added GrayScale Image conversion and Duplicant article removals
|
||||
'''
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
__copyright__ = '2008-2012, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__version__ = 'v0.5.0'
|
||||
__date__ = '2012-01-19'
|
||||
__author__ = 'Darko Miletic'
|
||||
|
||||
'''
|
||||
newscientist.com
|
||||
'''
|
||||
|
||||
import re
|
||||
import urllib
|
||||
from calibre.utils.magick import Image
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NewScientist(BasicNewsRecipe):
|
||||
title = 'New Scientist - Online News w. subscription'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Science news and science articles from New Scientist.'
|
||||
language = 'en'
|
||||
publisher = 'Reed Business Information Ltd.'
|
||||
@ -39,10 +58,19 @@ class NewScientist(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
|
||||
|
||||
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
|
||||
# more than one section). If True, only the first occurance will be downloaded.
|
||||
filterDuplicates = True
|
||||
|
||||
# Whether to convert images to grayscale for eInk readers.
|
||||
Convert_Grayscale = False
|
||||
|
||||
url_list = [] # This list is used to check if an article had already been included.
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open('http://www.newscientist.com/')
|
||||
if self.username is not None and self.password is not None:
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://www.newscientist.com/user/login')
|
||||
data = urllib.urlencode({ 'source':'form'
|
||||
,'redirectURL':''
|
||||
@ -80,6 +108,10 @@ class NewScientist(BasicNewsRecipe):
|
||||
return article.get('guid', None)
|
||||
|
||||
def print_version(self, url):
|
||||
if self.filterDuplicates:
|
||||
if url in self.url_list:
|
||||
return
|
||||
self.url_list.append(url)
|
||||
return url + '?full=true&print=true'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
@ -91,7 +123,7 @@ class NewScientist(BasicNewsRecipe):
|
||||
item.name='p'
|
||||
for item in soup.findAll(['xref','figref']):
|
||||
tstr = item.string
|
||||
item.replaceWith(tstr)
|
||||
item.replaceWith(tstr)
|
||||
for tg in soup.findAll('a'):
|
||||
if tg.string == 'Home':
|
||||
tg.parent.extract()
|
||||
@ -101,3 +133,16 @@ class NewScientist(BasicNewsRecipe):
|
||||
tg.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
# Converts images to Gray Scale
|
||||
def postprocess_html(self, soup, first):
|
||||
if self.Convert_Grayscale:
|
||||
#process all the images
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
@ -1,8 +1,15 @@
|
||||
# Talking Points is not grabbing everything.
|
||||
# The look is right, but only the last one added?
|
||||
import re
|
||||
import time
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
# Allows the Python soup converter, which makes parsing easier.
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
# strip ads and graphics
|
||||
# Current Column lacks a title.
|
||||
# Talking Points Memo - shorten title - Remove year and Bill's name
|
||||
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
||||
# Newsletters: Talking Points Memos covered by cat12
|
||||
|
||||
class OReillyPremium(BasicNewsRecipe):
|
||||
title = u'OReilly Premium'
|
||||
@ -19,7 +26,17 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
# Don't go down
|
||||
recursions = 0
|
||||
max_articles_per_feed = 2000
|
||||
language = 'en'
|
||||
|
||||
debugMessages = True
|
||||
|
||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
|
||||
["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
||||
["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
||||
["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
||||
["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
||||
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
@ -31,6 +48,8 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
# Returns the best-guess print url.
|
||||
# The second parameter (pageURL) is returned if nothing is found.
|
||||
def extractPrintURL(self, baseURL, pageURL, printString):
|
||||
tagURL = pageURL
|
||||
soup = self.index_to_soup(pageURL)
|
||||
@ -38,7 +57,6 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
printText = soup.find('a', text=printString)
|
||||
else :
|
||||
print("Failed to find Print string "+printString+ " in "+pageURL)
|
||||
|
||||
if printText:
|
||||
tag = printText.parent
|
||||
tagURL = baseURL+tag['href']
|
||||
@ -47,177 +65,111 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
def stripBadChars(self, inString) :
|
||||
return inString.replace("\'", "")
|
||||
|
||||
|
||||
# returns a qualifying article list
|
||||
def parseNoSpinArchives(self, baseURL, soupURL, debugMessages):
|
||||
articleList = []
|
||||
soup = self.index_to_soup(soupURL)
|
||||
for div in soup.findAll(True, attrs={'class':['blogBody'], 'style':['padding-top:10px;']}):
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
continue
|
||||
# re == regex. [href] is the link
|
||||
url = baseURL
|
||||
url +=re.sub(r'\?.*', '', a['href'])
|
||||
# Get print version
|
||||
printURL = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
if printURL:
|
||||
url = printURL
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
if debugMessages :
|
||||
print("No Spin Archive Title:"+title+" at url: "+url)
|
||||
description = 'None'
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
return articleList
|
||||
|
||||
|
||||
def parseTVArchives(self, baseURL, soupURL, debugMessages):
|
||||
# TV Archives page has some Ajax, so look for the static only.
|
||||
articleList = []
|
||||
soup = self.index_to_soup(soupURL)
|
||||
if debugMessages :
|
||||
print("In parseTVArchives")
|
||||
for div in soup.findAll('a', {'class':['showLinks','homeLinks']}):
|
||||
a = div
|
||||
url = baseURL
|
||||
url +=a['href']
|
||||
printURL = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
if printURL:
|
||||
url = printURL
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
title = self.stripBadChars(title)
|
||||
if debugMessages :
|
||||
print("TV Archive "+title+" at url: "+url)
|
||||
description = 'None'
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
if debugMessages :
|
||||
print("Leaving TV Parse ")
|
||||
return articleList
|
||||
|
||||
# Get Daily Briefing Archives
|
||||
def parseDailyBriefs(self, baseURL, soupURL, debugMessages) :
|
||||
print("Starting daily briefs")
|
||||
articleList = []
|
||||
soup = self.index_to_soup(soupURL)
|
||||
for div in soup.findAll(True, attrs={'class':['defaultHeaderSmallLinks']}):
|
||||
# re == regex. [href] is the link
|
||||
url = baseURL
|
||||
url +=re.sub(r'\?.*', '', div['href'])
|
||||
printURL = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
if printURL:
|
||||
url = printURL
|
||||
title = div.contents[0]
|
||||
if debugMessages :
|
||||
print("Daily Brief - title:"+title+" at url: "+url)
|
||||
description = 'None'
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
print("Leaving daily briefs")
|
||||
return articleList
|
||||
|
||||
# Get the weekly Stratfor intelligence report
|
||||
def parseStratfor(self, baseURL, soupURL, debugMessages):
|
||||
# http://www.billoreilly.com/blog?categoryID=5
|
||||
articleList = []
|
||||
soup = self.index_to_soup(soupURL)
|
||||
if debugMessages :
|
||||
print("In parseStratfor")
|
||||
a = soup.find('a', {'class':['blogLinks']})
|
||||
url = baseURL
|
||||
url +=a['href']
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
if debugMessages :
|
||||
print("url: "+url)
|
||||
print("title:"+title)
|
||||
# Get Stratfor contents so we can get the real title.
|
||||
stratSoup = self.index_to_soup(url)
|
||||
title = stratSoup.html.head.title.string
|
||||
stratIndex = title.find('Stratfor.com:', 0)
|
||||
if (stratIndex > -1) :
|
||||
title = title[stratIndex+14:-1]
|
||||
# Look for first blogBody <td class="blogBody"
|
||||
stratBody = stratSoup.find('td', {'class':['blogBody']})
|
||||
if debugMessages :
|
||||
print("Strat content title:"+title)
|
||||
print("Strat body: "+ stratBody.contents[0])
|
||||
description = 'None'
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
if debugMessages :
|
||||
print("Leaving Stratfor Parse ")
|
||||
return articleList
|
||||
|
||||
def parseTalkingPoints(self, baseURL, soupURL, debugMessages) :
|
||||
# Look for blogDate. That's got the date... Then the next blogBody has the title. and then an anchor with class "homeBlogReadMore bold" has the URL.
|
||||
articleList = []
|
||||
soup = self.index_to_soup(soupURL)
|
||||
if debugMessages :
|
||||
print("Starting Talking Points")
|
||||
topDate = soup.find("td", "blogBody")
|
||||
if not topDate :
|
||||
print("Failed to find date in Talking Points")
|
||||
# This page has the contents in double-wrapped tables!
|
||||
# tableParent = topDate.parent.parent
|
||||
myTable = topDate.findParents('table')[0]
|
||||
upOneTable = myTable.findParents('table')[0]
|
||||
upTwo = upOneTable.findParents('table')[0]
|
||||
# Now navigate rows of upTwo
|
||||
if debugMessages :
|
||||
print("Entering rows")
|
||||
for rows in upTwo.findChildren("tr", recursive=False):
|
||||
# Inside top level table, each row is an article
|
||||
rowTable = rows.find("table")
|
||||
articleTable = rowTable.find("table")
|
||||
articleTable = rows.find("tr")
|
||||
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
|
||||
blogDate = articleTable.find("a","blogDate").contents[0]
|
||||
# Skip to second blogBody for this.
|
||||
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
|
||||
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
|
||||
# re == regex. [href] is the link
|
||||
url = baseURL
|
||||
url +=re.sub(r'\?.*', '', blogURL)
|
||||
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
|
||||
if debugMessages :
|
||||
print("Talking Points Memo title "+title+" at url: "+url)
|
||||
def parseGeneric(self, baseURL):
|
||||
# Does a generic parsing of the articles. There are six categories (0-5)
|
||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||
# NoSpin and TV are generic
|
||||
fullReturn = []
|
||||
for i in range(len(self.catList)) :
|
||||
articleList = []
|
||||
soup = self.index_to_soup(self.catList[i][1])
|
||||
# Set defaults
|
||||
description = 'None'
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
print("Exiting parseTalkingPoints\n")
|
||||
return articleList
|
||||
# Problem: 0-2 create many in an array
|
||||
# 3-5 create one.
|
||||
# So no for-div for 3-5
|
||||
|
||||
def parseCurrentColumn(self, baseURL, soupURL, debugMessages) :
|
||||
# Only needed to get the column title. Otherwise it's all good already; there's only one column
|
||||
articleList = []
|
||||
soup = self.index_to_soup(soupURL)
|
||||
titleSpan = soup.find('span', {'class':['defaultHeader']})
|
||||
title = titleSpan.contents[0]
|
||||
# Get Print URL since it's available
|
||||
printURL = self.extractPrintURL(baseURL, soupURL, "Print This Article")
|
||||
if printURL:
|
||||
print("Found print URL")
|
||||
url = printURL
|
||||
if debugMessages :
|
||||
print("url: "+url)
|
||||
print("title:"+title)
|
||||
description = 'None'
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
if debugMessages :
|
||||
print("Leaving Stratfor Parse ")
|
||||
return articleList
|
||||
if i < 3 :
|
||||
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
|
||||
print(div)
|
||||
if i == 1:
|
||||
a = div.find('a', href=True)
|
||||
else :
|
||||
a = div
|
||||
print(a)
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
if not a:
|
||||
continue
|
||||
# url = baseURL+re.sub(r'\?.*', '', a['href'])
|
||||
url = baseURL+a['href']
|
||||
if i < 2 :
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
elif i == 2 :
|
||||
# Daily Briefs
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = div.contents[0]
|
||||
if self.debugMessages :
|
||||
print(title+" @ "+url)
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
|
||||
elif i == 3 : # Stratfor
|
||||
a = soup.find('a', self.catList[i][3])
|
||||
if a is None :
|
||||
continue
|
||||
url = baseURL+a['href']
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
# Get Stratfor contents so we can get the real title.
|
||||
stratSoup = self.index_to_soup(url)
|
||||
title = stratSoup.html.head.title.string
|
||||
stratIndex = title.find('Stratfor.com:', 0)
|
||||
if (stratIndex > -1) :
|
||||
title = title[stratIndex+14:-1]
|
||||
# Look for first blogBody <td class="blogBody"
|
||||
# Changed 12 Jan 2012 - new page format
|
||||
#stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
|
||||
#stratBody = stratSoup.find('td', {'class':['blogBody']})
|
||||
elif i == 4 : # Talking Points
|
||||
topDate = soup.find("td", "blogBody")
|
||||
if not topDate :
|
||||
print("Failed to find date in Talking Points")
|
||||
# This page has the contents in double-wrapped tables!
|
||||
myTable = topDate.findParents('table')[0]
|
||||
if myTable is not None:
|
||||
upOneTable = myTable.findParents('table')[0]
|
||||
if upOneTable is not None:
|
||||
upTwo = upOneTable.findParents('table')[0]
|
||||
if upTwo is None:
|
||||
continue
|
||||
# Now navigate rows of upTwo
|
||||
if self.debugMessages :
|
||||
print("Entering rows")
|
||||
for rows in upTwo.findChildren("tr", recursive=False):
|
||||
# Inside top level table, each row is an article
|
||||
rowTable = rows.find("table")
|
||||
articleTable = rowTable.find("table")
|
||||
# This looks wrong.
|
||||
articleTable = rows.find("tr")
|
||||
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
|
||||
blogDate = articleTable.find("a","blogDate").contents[0]
|
||||
# Skip to second blogBody for this.
|
||||
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
|
||||
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
|
||||
url = baseURL+re.sub(r'\?.*', '', blogURL)
|
||||
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
|
||||
if self.debugMessages :
|
||||
print("Talking Points Memo title "+title+" at url: "+url)
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
|
||||
else : # Current Column
|
||||
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
|
||||
if titleSpan is None :
|
||||
continue
|
||||
title = titleSpan.contents[0]
|
||||
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
|
||||
if i == 3 or i == 5 :
|
||||
if self.debugMessages :
|
||||
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
self.catList[i][3] = articleList
|
||||
fullReturn.append((self.catList[i][0], articleList))
|
||||
return fullReturn
|
||||
|
||||
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
||||
# returns a list of tuple ('feed title', list of articles)
|
||||
@ -231,27 +183,8 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
# this is used instead of BasicNewsRecipe.parse_feeds().
|
||||
def parse_index(self):
|
||||
# Parse the page into Python Soup
|
||||
debugMessages = True
|
||||
baseURL = "https://www.billoreilly.com"
|
||||
def feed_title(div):
|
||||
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
||||
# [] is list, {} is empty mapping.
|
||||
articleList = []
|
||||
ans = []
|
||||
showList = self.parseTVArchives(baseURL, 'https://www.billoreilly.com/show?action=tvShowArchive', debugMessages)
|
||||
articleList = self.parseNoSpinArchives(baseURL, 'https://www.billoreilly.com/blog?categoryID=7', debugMessages)
|
||||
stratList = self.parseStratfor(baseURL, 'http://www.billoreilly.com/blog?categoryID=5', debugMessages)
|
||||
dailyBriefs = self.parseDailyBriefs(baseURL, 'http://www.billoreilly.com/blog?categoryID=11', debugMessages)
|
||||
talkingPoints = self.parseTalkingPoints(baseURL, 'https://www.billoreilly.com/blog?categoryID=12', debugMessages)
|
||||
currentColumn = self.parseCurrentColumn(baseURL, 'https://www.billoreilly.com/currentcolumn', debugMessages)
|
||||
# Below, { x:y, a:b } creates a dictionary. We return a tuple of a title and list of dict...
|
||||
# Lists are constructed with square brackets, separating items with commas: [a, b, c]. Tuples are constructed by the comma operator (not within square brackets), with or without enclosing parentheses, but an empty tuple must have the enclosing parentheses, such as a, b, c or (). A single item tuple must have a trailing comma, such as (d,).
|
||||
# Shows first two if talking points and no spin news. Also if they are TV Shows ande Stratfor Weekly, also if Daily Briefing and Curren Column
|
||||
# So all work individually. No idea why only getting first two in TOC now.
|
||||
ans = [("Talking Points Memos", talkingPoints),("No Spin News", articleList),("TV Shows", showList),("Stratfor Weekly",stratList), ("Daily Briefing", dailyBriefs),("Current Column", currentColumn)]
|
||||
if debugMessages :
|
||||
print ans
|
||||
return ans
|
||||
return self.parseGeneric(baseURL)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||
|
46
recipes/the_daily_news_egypt.recipe
Normal file
46
recipes/the_daily_news_egypt.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
|
||||
'''
|
||||
abc.net.au/news
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class TheDailyNewsEG(BasicNewsRecipe):
|
||||
title = u'The Daily News Egypt'
|
||||
__author__ = 'Omm Mishmishah'
|
||||
description = 'News from Egypt'
|
||||
masthead_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
|
||||
cover_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
|
||||
|
||||
auto_cleanup = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'The Daily News Egypt'
|
||||
category = 'News, Egypt, World'
|
||||
language = 'en_EG'
|
||||
publication_type = 'newsportal'
|
||||
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
|
||||
preprocess_regexps = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': False
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['article section']})]
|
||||
|
||||
remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
|
||||
'inline-content story left', 'inline-content map left contracted', 'published',
|
||||
'story-map', 'statepromo', 'topics', ]})]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [(u'The Daily News Egypt', u'http://www.thedailynewsegypt.com/rss.php?sectionid=all')]
|
66
recipes/tweakers_net.recipe
Normal file
66
recipes/tweakers_net.recipe
Normal file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Tweakers(BasicNewsRecipe):
|
||||
title = u'Tweakers.net - with Reactions'
|
||||
__author__ = 'Roedi06'
|
||||
language = 'nl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}),
|
||||
{'id':'reacties'},
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id' : ['utracker']}),
|
||||
{'id' : ['channelNav']},
|
||||
{'id' : ['contentArea']},
|
||||
{'class' : ['breadCrumb']},
|
||||
{'class' : ['nextPrevious ellipsis']},
|
||||
{'class' : ['advertorial']},
|
||||
{'class' : ['sidebar']},
|
||||
{'class' : ['filterBox']},
|
||||
{'id' : ['toggleButtonTxt']},
|
||||
{'id' : ['socialButtons']},
|
||||
{'class' : ['button']},
|
||||
{'class' : ['textadTop']},
|
||||
{'class' : ['commentLink']},
|
||||
{'title' : ['Reageer op deze reactie']},
|
||||
{'class' : ['pageIndex']},
|
||||
{'class' : ['reactieHeader collapsed']},
|
||||
]
|
||||
no_stylesheets=True
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<hr*?>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'<p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'</p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'<a.*?>'), lambda h1: '<b><u>'),
|
||||
(re.compile(r'</a>'), lambda h2: '</u></b>'),
|
||||
(re.compile(r'<span class="new">', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'</span>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'), lambda match : ' - moderated 0<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'),
|
||||
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'), lambda match : ' - moderated +1<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'),
|
||||
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'), lambda match : ' - moderated +2<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'),
|
||||
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'), lambda match : ' - moderated +3<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'),
|
||||
(re.compile(r'<div class="moderation">.*?</div>'), lambda h1: ''),
|
||||
]
|
||||
|
||||
extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \
|
||||
.reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \
|
||||
.quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }'
|
||||
|
||||
|
||||
feeds = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?max=200'
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.variety.com
|
||||
'''
|
||||
@ -14,11 +14,11 @@ class Variety(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf8'
|
||||
publisher = 'Red Business Information'
|
||||
category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
|
||||
language = 'en'
|
||||
masthead_url = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
|
||||
masthead_url = 'http://images1.variety.com/graphics/variety/Variety_logo_green_tm.gif'
|
||||
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
|
||||
|
||||
conversion_options = {
|
||||
@ -30,17 +30,10 @@ class Variety(BasicNewsRecipe):
|
||||
|
||||
remove_tags = [dict(name=['object','link','map'])]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'art control'})]
|
||||
|
||||
feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
|
||||
|
||||
def print_version(self, url):
|
||||
rpt = url.rpartition('?')[0]
|
||||
artid = rpt.rpartition('/')[2]
|
||||
catidr = url.rpartition('categoryid=')[2]
|
||||
catid = catidr.partition('&')[0]
|
||||
return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
rpt = url.rpartition('.html')[0]
|
||||
return rpt + '?printerfriendly=true'
|
||||
|
46
recipes/villagevoice.recipe
Normal file
46
recipes/villagevoice.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class VillageVoice(BasicNewsRecipe):
|
||||
|
||||
title = 'Village Voice'
|
||||
feeds = [
|
||||
("Complete Issue", "http://villagevoice.com/syndication/issue"),
|
||||
("News", "http://villagevoice.com/syndication/section/news"),
|
||||
("Music", "http://villagevoice.com/syndication/section/music"),
|
||||
("Movies", "http://villagevoice.com/syndication/section/film"),
|
||||
#("Restaurants", "http://villagevoice.com/syndication/section/dining"),
|
||||
#("Music Events", "http://villagevoice.com/syndication/events?type=music"),
|
||||
#("Calendar Events", "http://villagevoice.com/syndication/events"),
|
||||
#("Promotional Events", "http://villagevoice.com/syndication/promoEvents"),
|
||||
#("Restaurant Guide", "http://villagevoice.com/syndication/restaurants/search")
|
||||
]
|
||||
|
||||
auto_cleanup = True
|
||||
max_articles_per_feed = 50
|
||||
masthead_url = "http://assets.villagevoice.com/img/citylogo.png"
|
||||
language = 'en'
|
||||
__author__ = 'Barty'
|
||||
|
||||
seen_urls = []
|
||||
|
||||
# village voice breaks the article up into multiple pages, so
|
||||
# parse page and grab the print url
|
||||
|
||||
url_regex = re.compile(r'\/content\/printVersion\/\d+',re.I)
|
||||
|
||||
def print_version(self, url):
|
||||
if url in self.seen_urls:
|
||||
return None
|
||||
self.seen_urls.append( url)
|
||||
soup = self.index_to_soup(url)
|
||||
atag = soup.find('a',attrs={'href':self.url_regex})
|
||||
if atag is None:
|
||||
self.log('Warning: no print url found for '+url)
|
||||
else:
|
||||
m = self.url_regex.search(atag['href'])
|
||||
if m:
|
||||
url = 'http://www.villagevoice.com'+m.group(0)
|
||||
return url
|
@ -3,7 +3,7 @@
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<title>..:: calibre {library} ::.. {title}</title>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=100" />
|
||||
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
|
||||
@ -58,7 +58,7 @@
|
||||
method="post" title="Donate to support the development of calibre">
|
||||
<div>
|
||||
<input type="hidden" name="cmd" value="_s-xclick"></input>
|
||||
<input type="hidden" name="hosted_button_id" value="3028915"></input>
|
||||
<input type="hidden" name="hosted_button_id" value="MZQCP8EESW4H4"></input>
|
||||
<input type="image"
|
||||
src="{prefix}/static/button-donate.png"
|
||||
name="submit"></input>
|
||||
|
@ -26,7 +26,11 @@ def login_to_google(username, password):
|
||||
br.form['Email'] = username
|
||||
br.form['Passwd'] = password
|
||||
raw = br.submit().read()
|
||||
if b'<title>Account overview - Account Settings</title>' not in raw:
|
||||
if re.search(br'<title>.*?Account Settings</title>', raw) is None:
|
||||
x = re.search(br'(?is)<title>.*?</title>', raw)
|
||||
if x is not None:
|
||||
print ('Title of post login page: %s'%x.group())
|
||||
#open('/tmp/goog.html', 'wb').write(raw)
|
||||
raise ValueError(('Failed to login to google with credentials: %s %s'
|
||||
'\nGoogle sometimes requires verification when logging in from a '
|
||||
'new IP address. Use lynx to login and supply the verification, '
|
||||
|
@ -18,14 +18,14 @@ msgstr ""
|
||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||
"devel@lists.alioth.debian.org>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2012-01-08 20:03+0000\n"
|
||||
"Last-Translator: Simeon <Unknown>\n"
|
||||
"PO-Revision-Date: 2012-01-14 02:30+0000\n"
|
||||
"Last-Translator: Wolfgang Rohdewald <wolfgang@rohdewald.de>\n"
|
||||
"Language-Team: German <debian-l10n-german@lists.debian.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2012-01-09 04:49+0000\n"
|
||||
"X-Generator: Launchpad (build 14640)\n"
|
||||
"X-Launchpad-Export-Date: 2012-01-15 05:18+0000\n"
|
||||
"X-Generator: Launchpad (build 14664)\n"
|
||||
"Language: de\n"
|
||||
|
||||
#. name for aaa
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 8, 35)
|
||||
numeric_version = (0, 8, 36)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -14,6 +14,7 @@ from functools import wraps, partial
|
||||
from calibre.db.locking import create_locks, RecordLock
|
||||
from calibre.db.fields import create_field
|
||||
from calibre.db.tables import VirtualTable
|
||||
from calibre.db.lazy import FormatMetadata, FormatsList
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import now
|
||||
|
||||
@ -127,14 +128,8 @@ class Cache(object):
|
||||
if not formats:
|
||||
good_formats = None
|
||||
else:
|
||||
good_formats = []
|
||||
for f in formats:
|
||||
try:
|
||||
mi.format_metadata[f] = self._format_metadata(book_id, f)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
good_formats.append(f)
|
||||
mi.format_metadata = FormatMetadata(self, id, formats)
|
||||
good_formats = FormatsList(formats, mi.format_metadata)
|
||||
mi.formats = good_formats
|
||||
mi.has_cover = _('Yes') if self._field_for('cover', book_id,
|
||||
default_value=False) else ''
|
||||
|
99
src/calibre/db/lazy.py
Normal file
99
src/calibre/db/lazy.py
Normal file
@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import weakref
|
||||
from functools import wraps
|
||||
from collections import MutableMapping, MutableSequence
|
||||
|
||||
'''
|
||||
Avoid doing stats on all files in a book when getting metadata for that book.
|
||||
Speeds up calibre startup with large libraries/libraries on a network share,
|
||||
with a composite custom column.
|
||||
'''
|
||||
|
||||
# Lazy format metadata retrieval {{{
|
||||
def resolved(f):
|
||||
@wraps(f)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
if getattr(self, '_must_resolve', True):
|
||||
self._resolve()
|
||||
self._must_resolve = False
|
||||
return f(self, *args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
class MutableBase(object):
|
||||
|
||||
@resolved
|
||||
def __str__(self):
|
||||
return str(self._values)
|
||||
|
||||
@resolved
|
||||
def __repr__(self):
|
||||
return repr(self._values)
|
||||
|
||||
@resolved
|
||||
def __unicode__(self):
|
||||
return unicode(self._values)
|
||||
|
||||
@resolved
|
||||
def __len__(self):
|
||||
return len(self._values)
|
||||
|
||||
@resolved
|
||||
def __iter__(self):
|
||||
return iter(self._values)
|
||||
|
||||
@resolved
|
||||
def __contains__(self, key):
|
||||
return key in self._values
|
||||
|
||||
@resolved
|
||||
def __getitem__(self, fmt):
|
||||
return self._values[fmt]
|
||||
|
||||
@resolved
|
||||
def __setitem__(self, key, val):
|
||||
self._values[key] = val
|
||||
|
||||
@resolved
|
||||
def __delitem__(self, key):
|
||||
del self._values[key]
|
||||
|
||||
|
||||
class FormatMetadata(MutableBase, MutableMapping):
|
||||
|
||||
def __init__(self, db, id_, formats):
|
||||
self._dbwref = weakref.ref(db)
|
||||
self._id = id_
|
||||
self._formats = formats
|
||||
|
||||
def _resolve(self):
|
||||
db = self._dbwref()
|
||||
self._values = {}
|
||||
for f in self._formats:
|
||||
try:
|
||||
self._values[f] = db.format_metadata(self._id, f)
|
||||
except:
|
||||
pass
|
||||
|
||||
class FormatsList(MutableBase, MutableSequence):
|
||||
|
||||
def __init__(self, formats, format_metadata):
|
||||
self._formats = formats
|
||||
self._format_metadata = format_metadata
|
||||
|
||||
def _resolve(self):
|
||||
self._values = [f for f in self._formats if f in self._format_metadata]
|
||||
|
||||
@resolved
|
||||
def insert(self, idx, val):
|
||||
self._values.insert(idx, val)
|
||||
|
||||
# }}}
|
||||
|
@ -162,7 +162,7 @@ class ANDROID(USBMS):
|
||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
|
||||
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO']
|
||||
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP']
|
||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
@ -175,7 +175,7 @@ class ANDROID(USBMS):
|
||||
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
|
||||
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
|
||||
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET']
|
||||
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
|
@ -11,6 +11,7 @@ from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.constants import filesystem_encoding
|
||||
|
||||
class CHMInput(InputFormatPlugin):
|
||||
|
||||
@ -36,6 +37,8 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
log.debug('Processing CHM...')
|
||||
with TemporaryDirectory('_chm2oeb') as tdir:
|
||||
if not isinstance(tdir, unicode):
|
||||
tdir = tdir.decode(filesystem_encoding)
|
||||
html_input = plugin_for_input_format('html')
|
||||
for opt in html_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
|
@ -6,13 +6,14 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
import re, codecs
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata import string_to_authors, MetaInformation
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre.ptempfile import TemporaryFile
|
||||
from calibre import force_unicode
|
||||
|
||||
def _clean(s):
|
||||
return s.replace(u'\u00a0', u' ')
|
||||
@ -138,6 +139,13 @@ def get_metadata_from_reader(rdr):
|
||||
resolve_entities=True)[0])
|
||||
|
||||
title = rdr.title
|
||||
try:
|
||||
x = rdr.GetEncoding()
|
||||
codecs.lookup(x)
|
||||
enc = x
|
||||
except:
|
||||
enc = 'cp1252'
|
||||
title = force_unicode(title, enc)
|
||||
authors = _get_authors(home)
|
||||
mi = MetaInformation(title, authors)
|
||||
publisher = _get_publisher(home)
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||
|
||||
import os, re
|
||||
import os, re, codecs
|
||||
|
||||
from calibre import guess_type as guess_mimetype
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||
@ -99,8 +99,17 @@ class CHMReader(CHMFile):
|
||||
|
||||
def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
|
||||
html_files = set([])
|
||||
try:
|
||||
x = self.GetEncoding()
|
||||
codecs.lookup(x)
|
||||
enc = x
|
||||
except:
|
||||
enc = 'cp1252'
|
||||
for path in self.Contents():
|
||||
lpath = os.path.join(output_dir, path)
|
||||
fpath = path
|
||||
if not isinstance(path, unicode):
|
||||
fpath = path.decode(enc)
|
||||
lpath = os.path.join(output_dir, fpath)
|
||||
self._ensure_dir(lpath)
|
||||
try:
|
||||
data = self.GetFile(path)
|
||||
@ -123,6 +132,7 @@ class CHMReader(CHMFile):
|
||||
self.log.warn('%r filename too long, skipping'%path)
|
||||
continue
|
||||
raise
|
||||
|
||||
if debug_dump:
|
||||
import shutil
|
||||
shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
|
||||
|
@ -8,6 +8,7 @@ import StringIO, sys
|
||||
from struct import pack
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre import force_unicode
|
||||
|
||||
class StreamSlicer(object):
|
||||
|
||||
@ -245,7 +246,9 @@ class MetadataUpdater(object):
|
||||
def get_metadata(self):
|
||||
''' Return MetaInformation with title, author'''
|
||||
self.get_original_metadata()
|
||||
return MetaInformation(self.metadata['Title'], [self.metadata['Authors']])
|
||||
title = force_unicode(self.metadata['Title'], 'utf-8')
|
||||
authors = force_unicode(self.metadata['Authors'], 'utf-8').split(';')
|
||||
return MetaInformation(title, authors)
|
||||
|
||||
def get_original_metadata(self):
|
||||
offset = self.base + self.topaz_headers['metadata']['blocks'][0]['offset']
|
||||
|
@ -13,6 +13,7 @@
|
||||
3. Much more comprehensive testing/error handling
|
||||
4. Properly encodes/decodes assertions
|
||||
5. Handles points in the padding of elements consistently
|
||||
6. Has a utility method to calculate the CFI for the current viewport position robustly
|
||||
|
||||
To check if this script is compatible with the current browser, call
|
||||
window.cfi.is_compatible() it will throw an exception if not compatible.
|
||||
@ -72,7 +73,7 @@ get_current_time = (target) -> # {{{
|
||||
fstr(ans)
|
||||
# }}}
|
||||
|
||||
window_scroll_pos = (win) -> # {{{
|
||||
window_scroll_pos = (win=window) -> # {{{
|
||||
if typeof(win.pageXOffset) == 'number'
|
||||
x = win.pageXOffset
|
||||
y = win.pageYOffset
|
||||
@ -86,7 +87,7 @@ window_scroll_pos = (win) -> # {{{
|
||||
return [x, y]
|
||||
# }}}
|
||||
|
||||
viewport_to_document = (x, y, doc) -> # {{{
|
||||
viewport_to_document = (x, y, doc=window?.document) -> # {{{
|
||||
until doc == window.document
|
||||
# We are in a frame
|
||||
frame = doc.defaultView.frameElement
|
||||
@ -101,7 +102,7 @@ viewport_to_document = (x, y, doc) -> # {{{
|
||||
return [x, y]
|
||||
# }}}
|
||||
|
||||
# Equivalent for caretRangeFromPoint for non WebKit browsers {{{
|
||||
# Convert point to character offset {{{
|
||||
range_has_point = (range, x, y) ->
|
||||
for rect in range.getClientRects()
|
||||
if (rect.left <= x <= rect.right) and (rect.top <= y <= rect.bottom)
|
||||
@ -157,7 +158,8 @@ class CanonicalFragmentIdentifier
|
||||
is_compatible(): Throws an error if the browser is not compatible with
|
||||
this script
|
||||
|
||||
at(x, y): which maps a point to a CFI, if possible
|
||||
at(x, y): Maps a point to a CFI, if possible
|
||||
at_current(): Returns the CFI corresponding to the current viewport scroll location
|
||||
|
||||
scroll_to(cfi): which scrolls the browser to a point corresponding to the
|
||||
given cfi, and returns the x and y co-ordinates of the point.
|
||||
@ -559,11 +561,73 @@ class CanonicalFragmentIdentifier
|
||||
null
|
||||
# }}}
|
||||
|
||||
current_cfi: () -> # {{{
|
||||
at_current: () -> # {{{
|
||||
[winx, winy] = window_scroll_pos()
|
||||
[winw, winh] = [window.innerWidth, window.innerHeight]
|
||||
max = Math.max
|
||||
winw = max(winw, 400)
|
||||
winh = max(winh, 600)
|
||||
deltay = Math.floor(winh/50)
|
||||
deltax = Math.floor(winw/25)
|
||||
miny = max(-winy, -winh)
|
||||
maxy = winh
|
||||
minx = max(-winx, -winw)
|
||||
maxx = winw
|
||||
|
||||
dist = (p1, p2) ->
|
||||
Math.sqrt(Math.pow(p1[0]-p2[0], 2), Math.pow(p1[1]-p2[1], 2))
|
||||
|
||||
get_cfi = (ox, oy) ->
|
||||
try
|
||||
cfi = this.at(ox, oy)
|
||||
point = this.point(cfi)
|
||||
catch err
|
||||
cfi = null
|
||||
|
||||
if point.range != null
|
||||
r = point.range
|
||||
rect = r.getClientRects()[0]
|
||||
|
||||
x = (point.a*rect.left + (1-point.a)*rect.right)
|
||||
y = (rect.top + rect.bottom)/2
|
||||
[x, y] = viewport_to_document(x, y, r.startContainer.ownerDocument)
|
||||
else
|
||||
node = point.node
|
||||
r = node.getBoundingClientRect()
|
||||
[x, y] = viewport_to_document(r.left, r.top, node.ownerDocument)
|
||||
if typeof(point.x) == 'number' and node.offsetWidth
|
||||
x += (point.x*node.offsetWidth)/100
|
||||
if typeof(point.y) == 'number' and node.offsetHeight
|
||||
y += (point.y*node.offsetHeight)/100
|
||||
|
||||
if dist(viewport_to_document(ox, oy), [x, y]) > 50
|
||||
cfi = null
|
||||
|
||||
return cfi
|
||||
|
||||
x_loop = (cury) ->
|
||||
for direction in [-1, 1]
|
||||
delta = deltax * direction
|
||||
curx = 0
|
||||
until (direction < 0 and curx < minx) or (direction > 0 and curx > maxx)
|
||||
cfi = get_cfi(curx, cury)
|
||||
if cfi
|
||||
return cfi
|
||||
curx += delta
|
||||
null
|
||||
|
||||
for direction in [-1, 1]
|
||||
delta = deltay * direction
|
||||
cury = 0
|
||||
until (direction < 0 and cury < miny) or (direction > 0 and cury > maxy)
|
||||
cfi = x_loop(cury, -1)
|
||||
if cfi
|
||||
return cfi
|
||||
cury += delta
|
||||
|
||||
# TODO: Return the CFI corresponding to the <body> tag
|
||||
null
|
||||
|
||||
# }}}
|
||||
|
||||
if window?
|
||||
|
@ -23,6 +23,7 @@
|
||||
indignation and dislike men who are so beguiled and demoralized by
|
||||
the charms of pleasure of the moment, so blinded by desire, that
|
||||
they cannot foresee</p>
|
||||
<p><img src="marker.png" width="300" height="300" alt="Test image"/></p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,7 +1,7 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Testing EPUB CFI</title>
|
||||
<title>Testing cfi.coffee</title>
|
||||
<script type="text/javascript" src="cfi.coffee"></script>
|
||||
<script type="text/javascript" src="cfi-test.coffee"></script>
|
||||
<style type="text/css">
|
||||
@ -46,7 +46,8 @@
|
||||
</head>
|
||||
<body>
|
||||
<div id="container">
|
||||
<h1 id="first-h1">Testing EPUB CFI</h1>
|
||||
<h1 id="first-h1">Testing cfi.coffee</h1>
|
||||
<p>Click anywhere and the location will be marked with a marker, whose position is set via a CFI.</p>
|
||||
<p><a id="reset" href="/">Reset CFI to None</a></p>
|
||||
<h2>A div with scrollbars</h2>
|
||||
<p>Scroll down and click on some elements. Make sure to hit both
|
||||
|
@ -103,7 +103,7 @@ def html5_parse(data, max_nesting_depth=100):
|
||||
xmlns_declaration = '{%s}'%XMLNS_NS
|
||||
non_html5_namespaces = {}
|
||||
seen_namespaces = set()
|
||||
for elem in tuple(data.iter()):
|
||||
for elem in tuple(data.iter(tag=etree.Element)):
|
||||
elem.attrib.pop('xmlns', None)
|
||||
namespaces = {}
|
||||
for x in tuple(elem.attrib):
|
||||
|
@ -462,7 +462,7 @@ class Scheduler(QObject):
|
||||
delta = timedelta(days=self.oldest)
|
||||
try:
|
||||
ids = list(self.db.tags_older_than(_('News'),
|
||||
delta))
|
||||
delta, must_have_authors=['calibre']))
|
||||
except:
|
||||
# Happens if library is being switched
|
||||
ids = []
|
||||
|
@ -362,7 +362,7 @@
|
||||
<item>
|
||||
<widget class="QLabel" name="label_7">
|
||||
<property name="text">
|
||||
<string>&Delete downloaded news older than:</string>
|
||||
<string>Delete downloaded news &older than:</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>old_news</cstring>
|
||||
|
@ -573,6 +573,9 @@ class SeriesIndexEdit(QDoubleSpinBox):
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def reset_original(self):
|
||||
self.original_series_name = self.series_edit.current_val
|
||||
|
||||
def break_cycles(self):
|
||||
try:
|
||||
self.series_edit.currentIndexChanged.disconnect()
|
||||
|
@ -376,6 +376,7 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
if not mi.is_null('series') and mi.series.strip():
|
||||
self.series.current_val = mi.series
|
||||
if mi.series_index is not None:
|
||||
self.series_index.reset_original()
|
||||
self.series_index.current_val = float(mi.series_index)
|
||||
if not mi.is_null('languages'):
|
||||
langs = [canonicalize_lang(x) for x in mi.languages]
|
||||
|
@ -325,6 +325,7 @@ class Preferences(QMainWindow):
|
||||
return
|
||||
rc = self.showing_widget.restart_critical
|
||||
self.committed = True
|
||||
do_restart = False
|
||||
if must_restart:
|
||||
self.must_restart = True
|
||||
msg = _('Some of the changes you made require a restart.'
|
||||
@ -335,12 +336,24 @@ class Preferences(QMainWindow):
|
||||
'set any more preferences, until you restart.')
|
||||
|
||||
|
||||
warning_dialog(self, _('Restart needed'), msg, show=True,
|
||||
d = warning_dialog(self, _('Restart needed'), msg,
|
||||
show_copy_button=False)
|
||||
b = d.bb.addButton(_('Restart calibre now'), d.bb.AcceptRole)
|
||||
b.setIcon(QIcon(I('lt.png')))
|
||||
d.do_restart = False
|
||||
def rf():
|
||||
d.do_restart = True
|
||||
b.clicked.connect(rf)
|
||||
d.set_details('')
|
||||
d.exec_()
|
||||
b.clicked.disconnect()
|
||||
do_restart = d.do_restart
|
||||
self.showing_widget.refresh_gui(self.gui)
|
||||
self.hide_plugin()
|
||||
if self.close_after_initial or (must_restart and rc):
|
||||
if self.close_after_initial or (must_restart and rc) or do_restart:
|
||||
self.close()
|
||||
if do_restart:
|
||||
self.gui.quit(restart=True)
|
||||
|
||||
|
||||
def cancel(self, *args):
|
||||
|
@ -73,6 +73,9 @@ class JavaScriptLoader(object):
|
||||
src = self.get(x)
|
||||
evaljs(src)
|
||||
|
||||
if not lang:
|
||||
lang = 'en'
|
||||
|
||||
def lang_name(l):
|
||||
l = l.lower()
|
||||
l = lang_as_iso639_1(l)
|
||||
|
@ -40,6 +40,7 @@ from calibre.utils.magick.draw import save_cover_data_to
|
||||
from calibre.utils.recycle_bin import delete_file, delete_tree
|
||||
from calibre.utils.formatter_functions import load_user_template_functions
|
||||
from calibre.db.errors import NoSuchFormat
|
||||
from calibre.db.lazy import FormatMetadata, FormatsList
|
||||
from calibre.utils.localization import (canonicalize_lang,
|
||||
calibre_langcode_to_name)
|
||||
|
||||
@ -81,7 +82,6 @@ class Tag(object):
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
|
||||
class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
'''
|
||||
An ebook metadata database that stores references to ebook files on disk.
|
||||
@ -170,6 +170,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
except:
|
||||
traceback.print_exc()
|
||||
self.field_metadata = FieldMetadata()
|
||||
self.format_filename_cache = defaultdict(dict)
|
||||
self._library_id_ = None
|
||||
# Create the lock to be used to guard access to the metadata writer
|
||||
# queues. This must be an RLock, not a Lock
|
||||
@ -310,6 +311,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
if not self.is_second_db:
|
||||
load_user_template_functions(self.prefs.get('user_template_functions', []))
|
||||
|
||||
# Load the format filename cache
|
||||
self.format_filename_cache = defaultdict(dict)
|
||||
for book_id, fmt, name in self.conn.get(
|
||||
'SELECT book,format,name FROM data'):
|
||||
self.format_filename_cache[book_id][fmt.upper() if fmt else ''] = name
|
||||
|
||||
self.conn.executescript('''
|
||||
DROP TRIGGER IF EXISTS author_insert_trg;
|
||||
CREATE TEMP TRIGGER author_insert_trg
|
||||
@ -599,7 +606,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
fname = self.construct_file_name(id)
|
||||
changed = False
|
||||
for format in formats:
|
||||
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
|
||||
name = self.format_filename_cache[id].get(format.upper(), None)
|
||||
if name and name != fname:
|
||||
changed = True
|
||||
break
|
||||
@ -944,14 +951,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
good_formats = None
|
||||
else:
|
||||
formats = sorted(formats.split(','))
|
||||
good_formats = []
|
||||
for f in formats:
|
||||
try:
|
||||
mi.format_metadata[f] = self.format_metadata(id, f)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
good_formats.append(f)
|
||||
mi.format_metadata = FormatMetadata(self, id, formats)
|
||||
good_formats = FormatsList(formats, mi.format_metadata)
|
||||
mi.formats = good_formats
|
||||
tags = row[fm['tags']]
|
||||
if tags:
|
||||
@ -1145,12 +1146,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
def format_files(self, index, index_is_id=False):
|
||||
id = index if index_is_id else self.id(index)
|
||||
try:
|
||||
formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,))
|
||||
formats = map(lambda x:(x[0], x[1]), formats)
|
||||
return formats
|
||||
except:
|
||||
return []
|
||||
return [(v, k) for k, v in self.format_filename_cache[id].iteritems()]
|
||||
|
||||
def formats(self, index, index_is_id=False, verify_formats=True):
|
||||
''' Return available formats as a comma separated list or None if there are no available formats '''
|
||||
@ -1236,7 +1232,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
'''
|
||||
id = index if index_is_id else self.id(index)
|
||||
try:
|
||||
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
|
||||
name = self.format_filename_cache[id][format.upper()]
|
||||
except:
|
||||
return None
|
||||
if name:
|
||||
@ -1333,11 +1329,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
def add_format(self, index, format, stream, index_is_id=False, path=None,
|
||||
notify=True, replace=True):
|
||||
id = index if index_is_id else self.id(index)
|
||||
if format:
|
||||
self.format_metadata_cache[id].pop(format.upper(), None)
|
||||
if not format: format = ''
|
||||
self.format_metadata_cache[id].pop(format.upper(), None)
|
||||
name = self.format_filename_cache[id].get(format.upper(), None)
|
||||
if path is None:
|
||||
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
|
||||
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
|
||||
if name and not replace:
|
||||
return False
|
||||
name = self.construct_file_name(id)
|
||||
@ -1355,6 +1351,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
|
||||
(id, format.upper(), size, name))
|
||||
self.conn.commit()
|
||||
self.format_filename_cache[id][format.upper()] = name
|
||||
self.refresh_ids([id])
|
||||
if notify:
|
||||
self.notify('metadata', [id])
|
||||
@ -1402,9 +1399,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
def remove_format(self, index, format, index_is_id=False, notify=True,
|
||||
commit=True, db_only=False):
|
||||
id = index if index_is_id else self.id(index)
|
||||
if format:
|
||||
self.format_metadata_cache[id].pop(format.upper(), None)
|
||||
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
|
||||
if not format: format = ''
|
||||
self.format_metadata_cache[id].pop(format.upper(), None)
|
||||
name = self.format_filename_cache[id].pop(format.upper(), None)
|
||||
if name:
|
||||
if not db_only:
|
||||
try:
|
||||
@ -1925,7 +1922,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
############# End get_categories
|
||||
|
||||
def tags_older_than(self, tag, delta, must_have_tag=None):
|
||||
def tags_older_than(self, tag, delta, must_have_tag=None,
|
||||
must_have_authors=None):
|
||||
'''
|
||||
Return the ids of all books having the tag ``tag`` that are older than
|
||||
than the specified time. tag comparison is case insensitive.
|
||||
@ -1934,6 +1932,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
the tag are returned.
|
||||
:param must_have_tag: If not None the list of matches will be
|
||||
restricted to books that have this tag
|
||||
:param must_have_authors: A list of authors. If not None the list of
|
||||
matches will be restricted to books that have these authors (case
|
||||
insensitive).
|
||||
'''
|
||||
tag = tag.lower().strip()
|
||||
mht = must_have_tag.lower().strip() if must_have_tag else None
|
||||
@ -1941,9 +1942,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
tindex = self.FIELD_MAP['timestamp']
|
||||
gindex = self.FIELD_MAP['tags']
|
||||
iindex = self.FIELD_MAP['id']
|
||||
aindex = self.FIELD_MAP['authors']
|
||||
mah = must_have_authors
|
||||
if mah is not None:
|
||||
mah = [x.replace(',', '|').lower() for x in mah]
|
||||
mah = ','.join(mah)
|
||||
for r in self.data._data:
|
||||
if r is not None:
|
||||
if delta is None or (now - r[tindex]) > delta:
|
||||
if mah:
|
||||
authors = r[aindex] or ''
|
||||
if authors.lower() != mah:
|
||||
continue
|
||||
tags = r[gindex]
|
||||
if tags:
|
||||
tags = [x.strip() for x in tags.lower().split(',')]
|
||||
@ -3128,6 +3138,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
stream.seek(0)
|
||||
mi = get_metadata(stream, format, use_libprs_metadata=False,
|
||||
force_read_metadata=True)
|
||||
# Force the author to calibre as the auto delete of old news checks for
|
||||
# both the author==calibre and the tag News
|
||||
mi.authors = ['calibre']
|
||||
stream.seek(0)
|
||||
if mi.series_index is None:
|
||||
mi.series_index = self.get_next_series_num_for(mi.series)
|
||||
|
@ -497,7 +497,8 @@ class BrowseServer(object):
|
||||
xml(s, True),
|
||||
xml(_('Loading, please wait'))+'…',
|
||||
unicode(c),
|
||||
xml(u'/browse/category_group/%s/%s'%(category,
|
||||
xml(u'/browse/category_group/%s/%s'%(
|
||||
hexlify(category.encode('utf-8')),
|
||||
hexlify(s.encode('utf-8'))), True),
|
||||
self.opts.url_prefix)
|
||||
for s, c in category_groups.items()]
|
||||
@ -531,6 +532,13 @@ class BrowseServer(object):
|
||||
sort = None
|
||||
if sort not in ('rating', 'name', 'popularity'):
|
||||
sort = 'name'
|
||||
try:
|
||||
category = unhexlify(category)
|
||||
if isbytestring(category):
|
||||
category = category.decode('utf-8')
|
||||
except:
|
||||
raise cherrypy.HTTPError(404, 'invalid category')
|
||||
|
||||
categories = self.categories_cache()
|
||||
if category not in categories:
|
||||
raise cherrypy.HTTPError(404, 'category not found')
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 1.6 KiB After Width: | Height: | Size: 1.5 KiB |
Binary file not shown.
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.7 KiB |
Binary file not shown.
Before Width: | Height: | Size: 733 B After Width: | Height: | Size: 2.3 KiB |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user