Merge from trunk

This commit is contained in:
Charles Haley 2011-11-24 21:58:01 +01:00
commit 390ed2e982
81 changed files with 43713 additions and 38319 deletions

View File

@ -0,0 +1,90 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class GiveMeSomethingToRead(BasicNewsRecipe):
title = u'Give Me Something To Read'
description = 'Curation / aggregation of articles on diverse topics'
language = 'en'
__author__ = 'barty on mobileread.com forum'
max_articles_per_feed = 100
no_stylesheets = False
timefmt = ' [%a, %d %b, %Y]'
oldest_article = 365
auto_cleanup = True
INDEX = 'http://givemesomethingtoread.com'
CATEGORIES = [
# comment out categories you don't want
# (user friendly name, system name, max number of articles to load)
('The Arts','arts',25),
('Science','science',30),
('Technology','technology',30),
('Politics','politics',20),
('Media','media',30),
('Crime','crime',15),
('Other articles','',10)
]
def parse_index(self):
self.cover_url = 'http://thegretchenshow.files.wordpress.com/2009/12/well-read-cat-small.jpg'
feeds = []
seen_urls = set([])
regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
for category in self.CATEGORIES:
(cat_name, tag, max_articles) = category
tagurl = '' if tag=='' else '/tagged/'+tag
self.log('Reading category:', cat_name)
articles = []
pageno = 1
while len(articles) < max_articles and pageno < 100:
page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
pageno += 1
self.log('\tReading page:', page)
try:
soup = self.index_to_soup(page)
except:
break
headers = soup.findAll('h2')
if len(headers) == .0:
break
for header in headers:
atag = header.find('a')
url = atag['href']
# skip promotionals and duplicate
if url.startswith('http://givemesomethingtoread') or url.startswith('/') or url in seen_urls:
continue
seen_urls.add(url)
title = self.tag_to_string(header)
self.log('\tFound article:', title)
#self.log('\t', url)
desc = header.parent.find('blockquote')
desc = self.tag_to_string(desc) if desc else ''
m = regex.match( url)
if m:
desc = "[%s] %s" % (m.group(2), desc)
#self.log('\t', desc)
date = ''
p = header.parent.previousSibling
# navigate up to find h3, which contains the date
while p:
if hasattr(p,'name') and p.name == 'h3':
date = self.tag_to_string(p)
break
p = p.previousSibling
articles.append({'title':title,'url':url,'description':desc,'date':date})
if len(articles) >= max_articles:
break
if articles:
feeds.append((cat_name, articles))
return feeds

View File

@ -0,0 +1,94 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class LetsGetCritical(BasicNewsRecipe):
title = u"Let's Get Critical"
description = 'Curation / aggregation of criticisms of the arts and culture '
language = 'en'
__author__ = 'barty on mobileread.com forum'
max_articles_per_feed = 100
no_stylesheets = False
timefmt = ' [%a, %d %b, %Y]'
oldest_article = 365
auto_cleanup = True
INDEX = 'http://www.letsgetcritical.org'
CATEGORIES = [
# comment out categories you don't want
# (user friendly name, system name, max number of articles to load)
('Architecture','architecture',30),
('Art','art',30),
('Books','books',30),
('Design','design',30),
('Digital','digital',30),
('Food','food',30),
('Movies','movies',30),
('Music','music',30),
('Television','television',30),
('Other articles','',10)
]
def parse_index(self):
self.cover_url = 'http://www.letsgetcritical.org/wp-content/themes/lets_get_critical/images/lgc.jpg'
feeds = []
seen_urls = set([])
regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
for category in self.CATEGORIES:
(cat_name, tag, max_articles) = category
tagurl = '' if tag=='' else '/category/'+tag.lower()
self.log('Reading category:', cat_name)
articles = []
pageno = 1
while len(articles) < max_articles and pageno < 100:
page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
pageno += 1
self.log('\tReading page:', page)
try:
soup = self.index_to_soup(page)
except:
break
posts = soup.findAll('div',attrs={'class':'post_multi'})
if len(posts) == 0:
break
for post in posts:
dt = post.find('div',attrs={'class':'title'})
atag = dt.find('a')
url = atag['href']
# skip promotionals and duplicate
if url.startswith('http://letsgetcritical') or url.startswith('/') or url in seen_urls:
continue
seen_urls.add(url)
title = self.tag_to_string(atag)
self.log('\tFound article:', title)
self.log('\t', url)
desc = post.find('blockquote')
desc = self.tag_to_string(desc) if desc else ''
m = regex.match( url)
if m:
desc = "[%s] %s" % (m.group(2), desc)
#self.log('\t', desc)
date = ''
p = post.previousSibling
# navigate up sibling to find date
while p:
if hasattr(p,'class') and p['class'] == 'singledate':
date = self.tag_to_string(p)
break
p = p.previousSibling
articles.append({'title':title,'url':url,'description':desc,'date':date})
if len(articles) >= max_articles:
break
if articles:
feeds.append((cat_name, articles))
return feeds

View File

@ -6,11 +6,7 @@ www.nin.co.rs
'''
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from contextlib import closing
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre import entity_to_unicode
class Nin(BasicNewsRecipe):
title = 'NIN online'
@ -81,7 +77,7 @@ class Nin(BasicNewsRecipe):
return cover_url
feeds = [(u'NIN Online', u'http://www.nin.co.rs/misc/rss.php?feed=RSS2.0')]
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
return url.replace('.co.yu', '.co.rs')

View File

@ -8,13 +8,13 @@ radikal.com.tr
from calibre.web.feeds.news import BasicNewsRecipe
class Radikal_tr(BasicNewsRecipe):
title = 'Radikal Ekleri'
__author__ = 'Darko Mileticden uyarlama'
description = 'Politic-Cultural Articles from Turkey'
title = 'Radikal - Turkey'
__author__ = 'Darko Miletic'
description = 'News from Turkey'
publisher = 'radikal'
category = 'news, politics, Turkey'
oldest_article = 14
max_articles_per_feed = 120
oldest_article = 7
max_articles_per_feed = 150
no_stylesheets = True
encoding = 'cp1254'
use_embedded_content = False
@ -37,7 +37,12 @@ class Radikal_tr(BasicNewsRecipe):
feeds = [
(u'Radikal Iki' , u'http://www.radikal.com.tr/d/rss/Rss_42.xml')
(u'Yazarlar' , u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml')
,(u'Turkiye' , u'http://www.radikal.com.tr/d/rss/Rss_97.xml' )
,(u'Politika' , u'http://www.radikal.com.tr/d/rss/Rss_98.xml' )
,(u'Dis Haberler', u'http://www.radikal.com.tr/d/rss/Rss_100.xml' )
,(u'Ekonomi' , u'http://www.radikal.com.tr/d/rss/Rss_101.xml' )
,(u'Radikal Iki' , u'http://www.radikal.com.tr/d/rss/Rss_42.xml')
,(u'Radikal Hayat' , u'http://www.radikal.com.tr/d/rss/Rss_41.xml' )
,(u'Radikal Kitap' , u'http://www.radikal.com.tr/d/rss/Rss_40.xml' )
]

View File

@ -12,14 +12,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-09-27 14:31+0000\n"
"PO-Revision-Date: 2011-11-13 15:24+0000\n"
"PO-Revision-Date: 2011-11-22 16:45+0000\n"
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
"Language-Team: Catalan <linux@softcatala.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-11-14 05:15+0000\n"
"X-Generator: Launchpad (build 14277)\n"
"X-Launchpad-Export-Date: 2011-11-23 05:19+0000\n"
"X-Generator: Launchpad (build 14336)\n"
"Language: ca\n"
#. name for aaa
@ -9164,71 +9164,71 @@ msgstr "Hewa"
#. name for han
msgid "Hangaza"
msgstr ""
msgstr "Hangaza"
#. name for hao
msgid "Hakö"
msgstr ""
msgstr "Hako"
#. name for hap
msgid "Hupla"
msgstr ""
msgstr "Hupla"
#. name for haq
msgid "Ha"
msgstr ""
msgstr "Ha"
#. name for har
msgid "Harari"
msgstr ""
msgstr "Harari"
#. name for has
msgid "Haisla"
msgstr ""
msgstr "Haisla"
#. name for hat
msgid "Creole; Haitian"
msgstr ""
msgstr "Crioll haitià"
#. name for hau
msgid "Hausa"
msgstr ""
msgstr "Hausa"
#. name for hav
msgid "Havu"
msgstr ""
msgstr "Havu"
#. name for haw
msgid "Hawaiian"
msgstr ""
msgstr "Hawaià"
#. name for hax
msgid "Haida; Southern"
msgstr ""
msgstr "Haida; meridional"
#. name for hay
msgid "Haya"
msgstr ""
msgstr "Haya"
#. name for haz
msgid "Hazaragi"
msgstr ""
msgstr "Hazaragi"
#. name for hba
msgid "Hamba"
msgstr ""
msgstr "Hamba"
#. name for hbb
msgid "Huba"
msgstr ""
msgstr "Huba"
#. name for hbn
msgid "Heiban"
msgstr ""
msgstr "Heiban"
#. name for hbo
msgid "Hebrew; Ancient"
msgstr ""
msgstr "Hebreu antic"
#. name for hbs
msgid "Serbo-Croatian"
@ -9236,7 +9236,7 @@ msgstr "Serbocroat"
#. name for hbu
msgid "Habu"
msgstr ""
msgstr "Habu"
#. name for hca
msgid "Creole Hindi; Andaman"
@ -9244,11 +9244,11 @@ msgstr "Hindi crioll; Andaman"
#. name for hch
msgid "Huichol"
msgstr ""
msgstr "Huichol"
#. name for hdn
msgid "Haida; Northern"
msgstr ""
msgstr "Haida; septentrional"
#. name for hds
msgid "Honduras Sign Language"
@ -9256,7 +9256,7 @@ msgstr "Llenguatge de signes hondureny"
#. name for hdy
msgid "Hadiyya"
msgstr ""
msgstr "Hadia"
#. name for hea
msgid "Miao; Northern Qiandong"
@ -9268,59 +9268,59 @@ msgstr "Hebreu"
#. name for hed
msgid "Herdé"
msgstr ""
msgstr "Herdé"
#. name for heg
msgid "Helong"
msgstr ""
msgstr "Helong"
#. name for heh
msgid "Hehe"
msgstr ""
msgstr "Hehe"
#. name for hei
msgid "Heiltsuk"
msgstr ""
msgstr "Heiltsuk"
#. name for hem
msgid "Hemba"
msgstr ""
msgstr "Hemba"
#. name for her
msgid "Herero"
msgstr ""
msgstr "Herero"
#. name for hgm
msgid "Hai//om"
msgstr ""
msgstr "Hai om"
#. name for hgw
msgid "Haigwai"
msgstr ""
msgstr "Haigwai"
#. name for hhi
msgid "Hoia Hoia"
msgstr ""
msgstr "Hoia Hoia"
#. name for hhr
msgid "Kerak"
msgstr ""
msgstr "Kerak"
#. name for hhy
msgid "Hoyahoya"
msgstr ""
msgstr "Hoyahoya"
#. name for hia
msgid "Lamang"
msgstr ""
msgstr "Lamang"
#. name for hib
msgid "Hibito"
msgstr ""
msgstr "Hibito"
#. name for hid
msgid "Hidatsa"
msgstr ""
msgstr "Hidatsa"
#. name for hif
msgid "Hindi; Fiji"
@ -9328,23 +9328,23 @@ msgstr "Hindi; Fiji"
#. name for hig
msgid "Kamwe"
msgstr ""
msgstr "Kamwe"
#. name for hih
msgid "Pamosu"
msgstr ""
msgstr "Hinihon"
#. name for hii
msgid "Hinduri"
msgstr ""
msgstr "Hinduri"
#. name for hij
msgid "Hijuk"
msgstr ""
msgstr "Hijuk"
#. name for hik
msgid "Seit-Kaitetu"
msgstr ""
msgstr "Seit-Kaitetu"
#. name for hil
msgid "Hiligaynon"
@ -24696,7 +24696,7 @@ msgstr ""
#. name for tcs
msgid "Creole; Torres Strait"
msgstr ""
msgstr "Crioll; Torres Estret"
#. name for tct
msgid "T'en"

File diff suppressed because it is too large Load Diff

View File

@ -1383,6 +1383,17 @@ class StoreLibreDEStore(StoreBase):
formats = ['EPUB', 'PDF']
affiliate = True
class StoreLitResStore(StoreBase):
name = 'LitRes'
description = u'ebooks from LitRes.ru'
actual_plugin = 'calibre.gui2.store.stores.litres_plugin:LitResStore'
author = 'Roman Mukhin'
drm_free_only = False
headquarters = 'RU'
formats = ['EPUB', 'TXT', 'RTF', 'HTML', 'FB2', 'LRF', 'PDF', 'MOBI', 'LIT', 'ISILO3', 'JAR', 'RB', 'PRC']
affiliate = True
class StoreManyBooksStore(StoreBase):
name = 'ManyBooks'
description = u'Public domain and creative commons works from many sources.'
@ -1567,6 +1578,7 @@ plugins += [
StoreKoboStore,
StoreLegimiStore,
StoreLibreDEStore,
StoreLitResStore,
StoreManyBooksStore,
StoreMobileReadStore,
StoreNextoStore,

View File

@ -64,6 +64,7 @@ class ANDROID(USBMS):
0xfce : {
0xd12e : [0x0100],
0xe14f : [0x0226],
0x614f : [0x0226, 0x100],
},
# Google

View File

@ -376,8 +376,8 @@ class MobiMLizer(object):
istate.preserve = (style['white-space'] in ('pre', 'pre-wrap'))
istate.bgcolor = style['background-color']
istate.fgcolor = style['color']
istate.strikethrough = style['text-decoration'] == 'line-through'
istate.underline = style['text-decoration'] == 'underline'
istate.strikethrough = style.effective_text_decoration == 'line-through'
istate.underline = style.effective_text_decoration == 'underline'
ff = style['font-family'].lower() if style['font-family'] else ''
if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'):
istate.family = 'monospace'

View File

@ -714,6 +714,26 @@ class Style(object):
self._lineHeight = result
return self._lineHeight
@property
def effective_text_decoration(self):
'''
Browsers do this creepy thing with text-decoration where even though the
property is not inherited, it looks like it is because containing
blocks apply it. The actual algorithm is utterly ridiculous, see
http://reference.sitepoint.com/css/text-decoration
This matters for MOBI output, where text-decoration is mapped to <u>
and <st> tags. Trying to implement the actual algorithm is too much
work, so we just use a simple fake that should cover most cases.
'''
css = self._style.get('text-decoration', None)
pcss = None
parent = self._get_parent()
if parent is not None:
pcss = parent._style.get('text-decoration', None)
if css in ('none', None) and pcss not in (None, 'none'):
return pcss
return css
@property
def marginTop(self):
return self._unit_convert(

View File

@ -0,0 +1,169 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
__docformat__ = 'restructuredtext en'
import random
import re
import urllib2
from contextlib import closing
from lxml import etree
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner, prints
from calibre.ebooks.chardet import xml_to_unicode
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class LitResStore(BasicStoreConfig, StorePlugin):
shop_url = u'http://www.litres.ru'
#http://robot.litres.ru/pages/biblio_book/?art=174405
def open(self, parent=None, detail_item=None, external=False):
aff_id = u'?' + _get_affiliate_id()
url = self.shop_url + aff_id
detail_url = None
if detail_item:
# http://www.litres.ru/pages/biblio_book/?art=157074
detail_url = self.shop_url + u'/pages/biblio_book/' + aff_id +\
u'&art=' + urllib2.quote(detail_item)
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
search_url = u'http://robot.litres.ru/pages/catalit_browser/?checkpoint=2000-01-02&'\
'search=%s&limit=0,%s'
search_url = search_url % (urllib2.quote(query), max_results)
counter = max_results
br = browser()
br.addheaders.append( ['Accept-Encoding','gzip'] )
with closing(br.open(search_url, timeout=timeout)) as r:
ungzipResponse(r,br)
raw= xml_to_unicode(r.read(), strip_encoding_pats=True, assume_utf8=True)[0]
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.fromstring(raw, parser=parser)
for data in doc.xpath('//*[local-name() = "fb2-book"]'):
if counter <= 0:
break
counter -= 1
try:
sRes = self.create_search_result(data)
except Exception as e:
prints('ERROR: cannot parse search result #%s: %s'%(max_results - counter + 1, e))
continue
yield sRes
def get_details(self, search_result, timeout=60):
pass
def create_search_result(self, data):
xp_template = 'normalize-space(@{0})'
sRes = SearchResult()
sRes.drm = SearchResult.DRM_UNLOCKED
sRes.detail_item = data.xpath(xp_template.format('hub_id'))
sRes.title = data.xpath('string(.//title-info/book-title/text()|.//publish-info/book-name/text())')
#aut = concat('.//title-info/author/first-name', ' ')
authors = data.xpath('.//title-info/author/first-name/text()|'\
'.//title-info/author/middle-name/text()|'\
'.//title-info/author/last-name/text()')
sRes.author = u' '.join(map(unicode, authors))
sRes.price = data.xpath(xp_template.format('price'))
# cover vs cover_preview
sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
sRes.price = format_price_in_RUR(sRes.price)
types = data.xpath('//fb2-book//files/file/@type')
fmt_set = _parse_ebook_formats(' '.join(types))
sRes.formats = ', '.join(fmt_set)
return sRes
def format_price_in_RUR(price):
'''
Try to format price according ru locale: '12 212,34 руб.'
@param price: price in format like 25.99
@return: formatted price if possible otherwise original value
@rtype: unicode
'''
if price and re.match("^\d*?\.\d*?$", price):
try:
price = u'{:,.2F} руб.'.format(float(price))
price = price.replace(',', ' ').replace('.', ',', 1)
except:
pass
return price
def ungzipResponse(r,b):
headers = r.info()
if headers['Content-Encoding']=='gzip':
import gzip
gz = gzip.GzipFile(fileobj=r, mode='rb')
data = gz.read()
gz.close()
#headers["Content-type"] = "text/html; charset=utf-8"
r.set_data( data )
b.set_response(r)
def _get_affiliate_id():
aff_id = u'3623565'
# Use Kovid's affiliate id 30% of the time.
if random.randint(1, 10) in (1, 2, 3):
aff_id = u'4084465'
return u'lfrom=' + aff_id
def _parse_ebook_formats(formatsStr):
'''
Creates a set with displayable names of the formats
:param formatsStr: string with comma separated book formats
as it provided by ozon.ru
:return: a list with displayable book formats
'''
formatsUnstruct = formatsStr.lower()
formats = set()
if 'fb2' in formatsUnstruct:
formats.add('FB2')
if 'html' in formatsUnstruct:
formats.add('HTML')
if 'txt' in formatsUnstruct:
formats.add('TXT')
if 'rtf' in formatsUnstruct:
formats.add('RTF')
if 'pdf' in formatsUnstruct:
formats.add('PDF')
if 'prc' in formatsUnstruct:
formats.add('PRC')
if 'lit' in formatsUnstruct:
formats.add('PRC')
if 'epub' in formatsUnstruct:
formats.add('ePub')
if 'rb' in formatsUnstruct:
formats.add('RB')
if 'isilo3' in formatsUnstruct:
formats.add('ISILO3')
if 'lrf' in formatsUnstruct:
formats.add('LRF')
if 'jar' in formatsUnstruct:
formats.add('JAR')
return formats

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff