mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get books: Fix Gutenberg store and improvements to chitanka.info and e-knigni.net
This commit is contained in:
commit
e53539285a
@ -6,7 +6,8 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, Alex Stanev <alex@stanev.org>'
|
__copyright__ = '2011, Alex Stanev <alex@stanev.org>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import urllib
|
import re
|
||||||
|
import urllib2
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@ -39,54 +40,24 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
|
|||||||
d.exec_()
|
d.exec_()
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
|
# check for cyrilic symbols before performing search
|
||||||
|
uquery = unicode(query.strip(), 'utf-8')
|
||||||
|
reObj = re.search(u'^[а-яА-Я\\d]{4,}[а-яА-Я\\d\\s]*$', uquery)
|
||||||
|
if not reObj:
|
||||||
|
return
|
||||||
|
|
||||||
base_url = 'http://chitanka.info'
|
base_url = 'http://chitanka.info'
|
||||||
url = base_url + '/search?q=' + urllib.quote(query)
|
url = base_url + '/search?q=' + urllib2.quote(query)
|
||||||
counter = max_results
|
counter = max_results
|
||||||
|
|
||||||
# search for book title
|
# search for book title
|
||||||
br = browser()
|
br = browser()
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
try:
|
||||||
f = unicode(f.read(), 'utf-8')
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f)
|
|
||||||
|
|
||||||
for data in doc.xpath('//ul[@class="superlist booklist"]/li'):
|
|
||||||
if counter <= 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip()
|
|
||||||
if not id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
counter -= 1
|
|
||||||
|
|
||||||
s = SearchResult()
|
|
||||||
s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip()
|
|
||||||
s.title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip()
|
|
||||||
s.author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip()
|
|
||||||
s.detail_item = id
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '')
|
|
||||||
s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '')
|
|
||||||
s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '')
|
|
||||||
s.formats = 'FB2, EPUB, TXT, SFB'
|
|
||||||
yield s
|
|
||||||
|
|
||||||
# search for author names
|
|
||||||
for data in doc.xpath('//ul[@class="superlist"][1]/li'):
|
|
||||||
author_url = ''.join(data.xpath('.//a[contains(@href,"/person/")]/@href'))
|
|
||||||
if counter <= 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
br2 = browser()
|
|
||||||
with closing(br2.open(base_url + author_url, timeout=timeout)) as f:
|
|
||||||
if counter <= 0:
|
|
||||||
break
|
|
||||||
f = unicode(f.read(), 'utf-8')
|
f = unicode(f.read(), 'utf-8')
|
||||||
doc2 = html.fromstring(f)
|
doc = html.fromstring(f)
|
||||||
|
|
||||||
# search for book title
|
for data in doc.xpath('//ul[@class="superlist booklist"]/li'):
|
||||||
for data in doc2.xpath('//ul[@class="superlist booklist"]/li'):
|
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -107,3 +78,51 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
|
|||||||
s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '')
|
s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '')
|
||||||
s.formats = 'FB2, EPUB, TXT, SFB'
|
s.formats = 'FB2, EPUB, TXT, SFB'
|
||||||
yield s
|
yield s
|
||||||
|
except urllib2.HTTPError, e:
|
||||||
|
if e.code == 404:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
# search for author names
|
||||||
|
for data in doc.xpath('//ul[@class="superlist"][1]/li/dl/dt'):
|
||||||
|
author_url = ''.join(data.xpath('.//a[contains(@href,"/person/")]/@href'))
|
||||||
|
if author_url == '':
|
||||||
|
continue
|
||||||
|
if counter <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
br2 = browser()
|
||||||
|
with closing(br2.open(base_url + author_url, timeout=timeout)) as f:
|
||||||
|
if counter <= 0:
|
||||||
|
break
|
||||||
|
f = unicode(f.read(), 'utf-8')
|
||||||
|
doc2 = html.fromstring(f)
|
||||||
|
|
||||||
|
# search for book title
|
||||||
|
for data in doc2.xpath('//ul[@class="superlist booklist"]/li'):
|
||||||
|
if counter <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip()
|
||||||
|
if not id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip()
|
||||||
|
author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip()
|
||||||
|
if title.lower().find(query.lower()) == -1 and author.lower().find(query.lower()) == -1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
counter -= 1
|
||||||
|
|
||||||
|
s = SearchResult()
|
||||||
|
s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip()
|
||||||
|
s.title = title
|
||||||
|
s.author = author
|
||||||
|
s.detail_item = id
|
||||||
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
|
s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '')
|
||||||
|
s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '')
|
||||||
|
s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '')
|
||||||
|
s.formats = 'FB2, EPUB, TXT, SFB'
|
||||||
|
yield s
|
||||||
|
@ -6,6 +6,7 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, Alex Stanev <alex@stanev.org>'
|
__copyright__ = '2011, Alex Stanev <alex@stanev.org>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
import random
|
import random
|
||||||
import urllib2
|
import urllib2
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
@ -45,8 +46,14 @@ class eKnigiStore(BasicStoreConfig, StorePlugin):
|
|||||||
d.exec_()
|
d.exec_()
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
|
# check for cyrilic symbols before performing search
|
||||||
|
uquery = unicode(query.strip(), 'utf-8')
|
||||||
|
reObj = re.search(u'^[а-яА-Я\\d]{2,}[а-яА-Я\\d\\s]*$', uquery)
|
||||||
|
if not reObj:
|
||||||
|
return
|
||||||
|
|
||||||
base_url = 'http://e-knigi.net'
|
base_url = 'http://e-knigi.net'
|
||||||
url = base_url + '/virtuemart?page=shop.browse&search_category=0&search_limiter=anywhere&limitstart=0&limit=' + str(max_results) + '&keyword=' + urllib2.quote(query)
|
url = base_url + '/virtuemart?page=shop.browse&search_category=0&search_limiter=anywhere&keyword=' + urllib2.quote(query)
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
@ -75,12 +82,18 @@ class eKnigiStore(BasicStoreConfig, StorePlugin):
|
|||||||
if not id:
|
if not id:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
title = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@title')).strip()
|
||||||
|
author = ''.join(data.xpath('.//div[@style="float:left;width:90%"]/b/text()')).strip().replace('Автор: ', '')
|
||||||
|
|
||||||
|
if title.lower().find(query.lower()) == -1 and author.lower().find(query.lower()) == -1:
|
||||||
|
continue
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
s = SearchResult()
|
s = SearchResult()
|
||||||
s.cover_url = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@src')).strip()
|
s.cover_url = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@src')).strip()
|
||||||
s.title = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@title')).strip()
|
s.title = title
|
||||||
s.author = ''.join(data.xpath('.//div[@style="float:left;width:90%"]/b/text()')).strip().replace('Автор: ', '')
|
s.author = author
|
||||||
s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip()
|
s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip()
|
||||||
s.detail_item = base_url + id
|
s.detail_item = base_url + id
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
|
@ -45,7 +45,7 @@ class GutenbergStore(BasicStoreConfig, StorePlugin):
|
|||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read())
|
||||||
for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title")]'):
|
for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title") and not(contains(@class, "toplink"))]'):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -69,7 +69,7 @@ class GutenbergStore(BasicStoreConfig, StorePlugin):
|
|||||||
yield s
|
yield s
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item + '.mobile')
|
url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item)
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
with closing(br.open(url, timeout=timeout)) as nf:
|
with closing(br.open(url, timeout=timeout)) as nf:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user