mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Give me something to read and Let's get Critical by Barty
This commit is contained in:
parent
d52a9ded1f
commit
c512404062
90
recipes/givemesomethingtoread.recipe
Normal file
90
recipes/givemesomethingtoread.recipe
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class GiveMeSomethingToRead(BasicNewsRecipe):
|
||||||
|
title = u'Give Me Something To Read'
|
||||||
|
description = 'Curation / aggregation of articles on diverse topics'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'barty on mobileread.com forum'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = False
|
||||||
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
|
oldest_article = 365
|
||||||
|
auto_cleanup = True
|
||||||
|
INDEX = 'http://givemesomethingtoread.com'
|
||||||
|
CATEGORIES = [
|
||||||
|
# comment out categories you don't want
|
||||||
|
# (user friendly name, system name, max number of articles to load)
|
||||||
|
('The Arts','arts',25),
|
||||||
|
('Science','science',30),
|
||||||
|
('Technology','technology',30),
|
||||||
|
('Politics','politics',20),
|
||||||
|
('Media','media',30),
|
||||||
|
('Crime','crime',15),
|
||||||
|
('Other articles','',10)
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
self.cover_url = 'http://thegretchenshow.files.wordpress.com/2009/12/well-read-cat-small.jpg'
|
||||||
|
feeds = []
|
||||||
|
seen_urls = set([])
|
||||||
|
regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
|
||||||
|
|
||||||
|
for category in self.CATEGORIES:
|
||||||
|
|
||||||
|
(cat_name, tag, max_articles) = category
|
||||||
|
|
||||||
|
tagurl = '' if tag=='' else '/tagged/'+tag
|
||||||
|
self.log('Reading category:', cat_name)
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
pageno = 1
|
||||||
|
|
||||||
|
while len(articles) < max_articles and pageno < 100:
|
||||||
|
|
||||||
|
page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
|
||||||
|
pageno += 1
|
||||||
|
|
||||||
|
self.log('\tReading page:', page)
|
||||||
|
try:
|
||||||
|
soup = self.index_to_soup(page)
|
||||||
|
except:
|
||||||
|
break
|
||||||
|
|
||||||
|
headers = soup.findAll('h2')
|
||||||
|
if len(headers) == .0:
|
||||||
|
break
|
||||||
|
|
||||||
|
for header in headers:
|
||||||
|
atag = header.find('a')
|
||||||
|
url = atag['href']
|
||||||
|
# skip promotionals and duplicate
|
||||||
|
if url.startswith('http://givemesomethingtoread') or url.startswith('/') or url in seen_urls:
|
||||||
|
continue
|
||||||
|
seen_urls.add(url)
|
||||||
|
title = self.tag_to_string(header)
|
||||||
|
self.log('\tFound article:', title)
|
||||||
|
#self.log('\t', url)
|
||||||
|
desc = header.parent.find('blockquote')
|
||||||
|
desc = self.tag_to_string(desc) if desc else ''
|
||||||
|
m = regex.match( url)
|
||||||
|
if m:
|
||||||
|
desc = "[%s] %s" % (m.group(2), desc)
|
||||||
|
#self.log('\t', desc)
|
||||||
|
date = ''
|
||||||
|
p = header.parent.previousSibling
|
||||||
|
# navigate up to find h3, which contains the date
|
||||||
|
while p:
|
||||||
|
if hasattr(p,'name') and p.name == 'h3':
|
||||||
|
date = self.tag_to_string(p)
|
||||||
|
break
|
||||||
|
p = p.previousSibling
|
||||||
|
articles.append({'title':title,'url':url,'description':desc,'date':date})
|
||||||
|
if len(articles) >= max_articles:
|
||||||
|
break
|
||||||
|
|
||||||
|
if articles:
|
||||||
|
feeds.append((cat_name, articles))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
94
recipes/letsgetcritical.recipe
Normal file
94
recipes/letsgetcritical.recipe
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class LetsGetCritical(BasicNewsRecipe):
|
||||||
|
title = u"Let's Get Critical"
|
||||||
|
description = 'Curation / aggregation of criticisms of the arts and culture '
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'barty on mobileread.com forum'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = False
|
||||||
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
|
oldest_article = 365
|
||||||
|
auto_cleanup = True
|
||||||
|
INDEX = 'http://www.letsgetcritical.org'
|
||||||
|
CATEGORIES = [
|
||||||
|
# comment out categories you don't want
|
||||||
|
# (user friendly name, system name, max number of articles to load)
|
||||||
|
('Architecture','architecture',30),
|
||||||
|
('Art','art',30),
|
||||||
|
('Books','books',30),
|
||||||
|
('Design','design',30),
|
||||||
|
('Digital','digital',30),
|
||||||
|
('Food','food',30),
|
||||||
|
('Movies','movies',30),
|
||||||
|
('Music','music',30),
|
||||||
|
('Television','television',30),
|
||||||
|
('Other articles','',10)
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
self.cover_url = 'http://www.letsgetcritical.org/wp-content/themes/lets_get_critical/images/lgc.jpg'
|
||||||
|
feeds = []
|
||||||
|
seen_urls = set([])
|
||||||
|
regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
|
||||||
|
|
||||||
|
for category in self.CATEGORIES:
|
||||||
|
|
||||||
|
(cat_name, tag, max_articles) = category
|
||||||
|
|
||||||
|
tagurl = '' if tag=='' else '/category/'+tag.lower()
|
||||||
|
self.log('Reading category:', cat_name)
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
pageno = 1
|
||||||
|
|
||||||
|
while len(articles) < max_articles and pageno < 100:
|
||||||
|
|
||||||
|
page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
|
||||||
|
pageno += 1
|
||||||
|
|
||||||
|
self.log('\tReading page:', page)
|
||||||
|
try:
|
||||||
|
soup = self.index_to_soup(page)
|
||||||
|
except:
|
||||||
|
break
|
||||||
|
|
||||||
|
posts = soup.findAll('div',attrs={'class':'post_multi'})
|
||||||
|
if len(posts) == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
for post in posts:
|
||||||
|
dt = post.find('div',attrs={'class':'title'})
|
||||||
|
atag = dt.find('a')
|
||||||
|
url = atag['href']
|
||||||
|
# skip promotionals and duplicate
|
||||||
|
if url.startswith('http://letsgetcritical') or url.startswith('/') or url in seen_urls:
|
||||||
|
continue
|
||||||
|
seen_urls.add(url)
|
||||||
|
title = self.tag_to_string(atag)
|
||||||
|
self.log('\tFound article:', title)
|
||||||
|
self.log('\t', url)
|
||||||
|
desc = post.find('blockquote')
|
||||||
|
desc = self.tag_to_string(desc) if desc else ''
|
||||||
|
m = regex.match( url)
|
||||||
|
if m:
|
||||||
|
desc = "[%s] %s" % (m.group(2), desc)
|
||||||
|
#self.log('\t', desc)
|
||||||
|
date = ''
|
||||||
|
p = post.previousSibling
|
||||||
|
# navigate up sibling to find date
|
||||||
|
while p:
|
||||||
|
if hasattr(p,'class') and p['class'] == 'singledate':
|
||||||
|
date = self.tag_to_string(p)
|
||||||
|
break
|
||||||
|
p = p.previousSibling
|
||||||
|
articles.append({'title':title,'url':url,'description':desc,'date':date})
|
||||||
|
if len(articles) >= max_articles:
|
||||||
|
break
|
||||||
|
|
||||||
|
if articles:
|
||||||
|
feeds.append((cat_name, articles))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
@ -6,11 +6,7 @@ www.nin.co.rs
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from contextlib import closing
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre import entity_to_unicode
|
|
||||||
|
|
||||||
class Nin(BasicNewsRecipe):
|
class Nin(BasicNewsRecipe):
|
||||||
title = 'NIN online'
|
title = 'NIN online'
|
||||||
@ -81,7 +77,7 @@ class Nin(BasicNewsRecipe):
|
|||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
feeds = [(u'NIN Online', u'http://www.nin.co.rs/misc/rss.php?feed=RSS2.0')]
|
feeds = [(u'NIN Online', u'http://www.nin.co.rs/misc/rss.php?feed=RSS2.0')]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
url = BasicNewsRecipe.get_article_url(self, article)
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
return url.replace('.co.yu', '.co.rs')
|
return url.replace('.co.yu', '.co.rs')
|
||||||
|
@ -11,7 +11,7 @@ import re
|
|||||||
import urllib2
|
import urllib2
|
||||||
|
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
from lxml import etree, html
|
from lxml import etree
|
||||||
from PyQt4.Qt import QUrl
|
from PyQt4.Qt import QUrl
|
||||||
|
|
||||||
from calibre import browser, url_slash_cleaner, prints
|
from calibre import browser, url_slash_cleaner, prints
|
||||||
@ -25,18 +25,18 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|||||||
class LitResStore(BasicStoreConfig, StorePlugin):
|
class LitResStore(BasicStoreConfig, StorePlugin):
|
||||||
shop_url = u'http://www.litres.ru'
|
shop_url = u'http://www.litres.ru'
|
||||||
#http://robot.litres.ru/pages/biblio_book/?art=174405
|
#http://robot.litres.ru/pages/biblio_book/?art=174405
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
def open(self, parent=None, detail_item=None, external=False):
|
||||||
|
|
||||||
aff_id = u'?' + _get_affiliate_id()
|
aff_id = u'?' + _get_affiliate_id()
|
||||||
|
|
||||||
url = self.shop_url + aff_id
|
url = self.shop_url + aff_id
|
||||||
detail_url = None
|
detail_url = None
|
||||||
if detail_item:
|
if detail_item:
|
||||||
# http://www.litres.ru/pages/biblio_book/?art=157074
|
# http://www.litres.ru/pages/biblio_book/?art=157074
|
||||||
detail_url = self.shop_url + u'/pages/biblio_book/' + aff_id +\
|
detail_url = self.shop_url + u'/pages/biblio_book/' + aff_id +\
|
||||||
u'&art=' + urllib2.quote(detail_item)
|
u'&art=' + urllib2.quote(detail_item)
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
if external or self.config.get('open_external', False):
|
||||||
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
|
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
|
||||||
else:
|
else:
|
||||||
@ -44,28 +44,28 @@ class LitResStore(BasicStoreConfig, StorePlugin):
|
|||||||
d.setWindowTitle(self.name)
|
d.setWindowTitle(self.name)
|
||||||
d.set_tags(self.config.get('tags', ''))
|
d.set_tags(self.config.get('tags', ''))
|
||||||
d.exec_()
|
d.exec_()
|
||||||
|
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
search_url = u'http://robot.litres.ru/pages/catalit_browser/?checkpoint=2000-01-02&'\
|
search_url = u'http://robot.litres.ru/pages/catalit_browser/?checkpoint=2000-01-02&'\
|
||||||
'search=%s&limit=0,%s'
|
'search=%s&limit=0,%s'
|
||||||
search_url = search_url % (urllib2.quote(query), max_results)
|
search_url = search_url % (urllib2.quote(query), max_results)
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
br = browser()
|
br = browser()
|
||||||
br.addheaders.append( ['Accept-Encoding','gzip'] )
|
br.addheaders.append( ['Accept-Encoding','gzip'] )
|
||||||
|
|
||||||
with closing(br.open(search_url, timeout=timeout)) as r:
|
with closing(br.open(search_url, timeout=timeout)) as r:
|
||||||
ungzipResponse(r,br)
|
ungzipResponse(r,br)
|
||||||
raw= xml_to_unicode(r.read(), strip_encoding_pats=True, assume_utf8=True)[0]
|
raw= xml_to_unicode(r.read(), strip_encoding_pats=True, assume_utf8=True)[0]
|
||||||
|
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
doc = etree.fromstring(raw, parser=parser)
|
doc = etree.fromstring(raw, parser=parser)
|
||||||
for data in doc.xpath('//*[local-name() = "fb2-book"]'):
|
for data in doc.xpath('//*[local-name() = "fb2-book"]'):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sRes = self.create_search_result(data)
|
sRes = self.create_search_result(data)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -75,10 +75,10 @@ class LitResStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
def get_details(self, search_result, timeout=60):
|
def get_details(self, search_result, timeout=60):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def create_search_result(self, data):
|
def create_search_result(self, data):
|
||||||
xp_template = 'normalize-space(@{0})'
|
xp_template = 'normalize-space(@{0})'
|
||||||
|
|
||||||
sRes = SearchResult()
|
sRes = SearchResult()
|
||||||
sRes.drm = SearchResult.DRM_UNLOCKED
|
sRes.drm = SearchResult.DRM_UNLOCKED
|
||||||
sRes.detail_item = data.xpath(xp_template.format('hub_id'))
|
sRes.detail_item = data.xpath(xp_template.format('hub_id'))
|
||||||
@ -92,7 +92,7 @@ class LitResStore(BasicStoreConfig, StorePlugin):
|
|||||||
# cover vs cover_preview
|
# cover vs cover_preview
|
||||||
sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
|
sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
|
||||||
sRes.price = format_price_in_RUR(sRes.price)
|
sRes.price = format_price_in_RUR(sRes.price)
|
||||||
|
|
||||||
types = data.xpath('//fb2-book//files/file/@type')
|
types = data.xpath('//fb2-book//files/file/@type')
|
||||||
fmt_set = _parse_ebook_formats(' '.join(types))
|
fmt_set = _parse_ebook_formats(' '.join(types))
|
||||||
sRes.formats = ', '.join(fmt_set)
|
sRes.formats = ', '.join(fmt_set)
|
||||||
@ -134,8 +134,8 @@ def _get_affiliate_id():
|
|||||||
def _parse_ebook_formats(formatsStr):
|
def _parse_ebook_formats(formatsStr):
|
||||||
'''
|
'''
|
||||||
Creates a set with displayable names of the formats
|
Creates a set with displayable names of the formats
|
||||||
|
|
||||||
:param formatsStr: string with comma separated book formats
|
:param formatsStr: string with comma separated book formats
|
||||||
as it provided by ozon.ru
|
as it provided by ozon.ru
|
||||||
:return: a list with displayable book formats
|
:return: a list with displayable book formats
|
||||||
'''
|
'''
|
||||||
@ -166,4 +166,4 @@ def _parse_ebook_formats(formatsStr):
|
|||||||
formats.add('LRF')
|
formats.add('LRF')
|
||||||
if 'jar' in formatsUnstruct:
|
if 'jar' in formatsUnstruct:
|
||||||
formats.add('JAR')
|
formats.add('JAR')
|
||||||
return formats
|
return formats
|
||||||
|
Loading…
x
Reference in New Issue
Block a user