This commit is contained in:
Kovid Goyal 2021-02-21 07:56:11 +05:30
commit fb9930a439
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,13 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
store_version = 1 # Needed for dynamic plugin loading
store_version = 2 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, Alex Stanev <alex@stanev.org>'
__docformat__ = 'restructuredtext en'
import re
from contextlib import closing
try:
from urllib.parse import quote
@ -27,6 +26,45 @@ from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
def parse_book_page(doc, base_url, counter):
for data in doc.xpath('//div[@class="booklist"]/div/div'):
if counter <= 0:
break
id = ''.join(data.xpath('.//div[@class="media-body"]/a[@class="booklink"]/@href')).strip()
if not id:
continue
counter -= 1
s = SearchResult()
s.cover_url = 'http:' + ''.join(
data.xpath('.//div[@class="media-left"]/a[@class="booklink"]/div/img/@src')).strip()
s.title = ''.join(data.xpath('.//div[@class="media-body"]/a[@class="booklink"]/i/text()')).strip()
alternative_headline = data.xpath('.//div[@class="media-body"]/div[@itemprop="alternativeHeadline"]/text()')
if len(alternative_headline) > 0:
s.title = "{} ({})".format(s.title, ''.join(alternative_headline).strip())
s.author = ', '.join(data.xpath('.//div[@class="media-body"]/div[@class="bookauthor"]/span/a/text()')).strip(', ')
s.detail_item = id
s.drm = SearchResult.DRM_UNLOCKED
s.downloads['FB2'] = base_url + ''.join(data.xpath(
'.//div[@class="media-body"]/div[@class="download-links"]/div/a[contains(@class,"dl-fb2")]/@href')).strip().replace(
'.zip', '')
s.downloads['EPUB'] = base_url + ''.join(data.xpath(
'.//div[@class="media-body"]/div[@class="download-links"]/div/a[contains(@class,"dl-epub")]/@href')).strip().replace(
'.zip', '')
s.downloads['TXT'] = base_url + ''.join(data.xpath(
'.//div[@class="media-body"]/div[@class="download-links"]/div/a[contains(@class,"dl-txt")]/@href')).strip().replace(
'.zip', '')
s.formats = 'FB2, EPUB, TXT'
yield s
return counter
class ChitankaStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
@ -46,12 +84,10 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
d.exec_()
def search(self, query, max_results=10, timeout=60):
# check for cyrillic symbols before performing search
if isinstance(query, bytes):
query = query.decode('utf-8')
uquery = query.strip()
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
if not reObj:
if len(query) < 3:
return
base_url = 'http://chitanka.info'
@ -64,73 +100,26 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
with closing(br.open(url, timeout=timeout)) as f:
f = f.read().decode('utf-8')
doc = html.fromstring(f)
for data in doc.xpath('//ul[@class="superlist booklist"]/li'):
counter = yield from parse_book_page(doc, base_url, counter)
if counter <= 0:
break
id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip()
if not id:
continue
counter -= 1
s = SearchResult()
s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip()
s.title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip()
s.author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip()
s.detail_item = id
s.drm = SearchResult.DRM_UNLOCKED
s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '')
s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '')
s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '')
s.formats = 'FB2, EPUB, TXT, SFB'
yield s
except HTTPError as e:
if e.code == 404:
return
else:
raise
# search for author names
for data in doc.xpath('//ul[@class="superlist"][1]/li/dl/dt'):
author_url = ''.join(data.xpath('.//a[contains(@href,"/person/")]/@href'))
if author_url == '':
continue
if counter <= 0:
break
br2 = browser()
with closing(br2.open(base_url + author_url, timeout=timeout)) as f:
if counter <= 0:
break
f = f.read().decode('utf-8')
doc2 = html.fromstring(f)
# search for book title
for data in doc2.xpath('//ul[@class="superlist booklist"]/li'):
doc = html.fromstring(f)
counter = yield from parse_book_page(doc, base_url, counter)
if counter <= 0:
break
id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip()
if not id:
continue
title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip()
author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip()
if title.lower().find(query.lower()) == -1 and author.lower().find(query.lower()) == -1:
continue
counter -= 1
s = SearchResult()
s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip()
s.title = title
s.author = author
s.detail_item = id
s.drm = SearchResult.DRM_UNLOCKED
s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '')
s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '')
s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '')
s.formats = 'FB2, EPUB, TXT, SFB'
yield s
except HTTPError as e:
if e.code == 404:
return
else:
raise