mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
269d9093c0
87
recipes/folhadesaopaulo_sub.recipe
Normal file
87
recipes/folhadesaopaulo_sub.recipe
Normal file
@ -0,0 +1,87 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FSP(BasicNewsRecipe):
|
||||
|
||||
title = u'Folha de S\xE3o Paulo - Jornal'
|
||||
__author__ = 'fluzao'
|
||||
description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
|
||||
u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
|
||||
INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
|
||||
language = 'pt'
|
||||
no_stylesheets = True
|
||||
max_articles_per_feed = 30
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
remove_tags_before = dict(name='b')
|
||||
remove_tags_after = dict(name='!--/NOTICIA--')
|
||||
remove_attributes = ['height','width']
|
||||
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
|
||||
|
||||
# fixes the problem with the section names
|
||||
section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
|
||||
'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
|
||||
'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
|
||||
'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
|
||||
|
||||
# this solves the problem with truncated content in Kindle
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://acesso.uol.com.br/login.html')
|
||||
br.form = br.forms().next()
|
||||
br['user'] = self.username
|
||||
br['pass'] = self.password
|
||||
br.submit().read()
|
||||
## if 'Please try again' in raw:
|
||||
## raise Exception('Your username and password are incorrect')
|
||||
return br
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
feeds = []
|
||||
articles = []
|
||||
section_title = "Preambulo"
|
||||
for post in soup.findAll('a'):
|
||||
# if name=True => new section
|
||||
strpost = str(post)
|
||||
if strpost.startswith('<a name'):
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
self.log()
|
||||
self.log('--> new section found, creating old section feed: ', section_title)
|
||||
section_title = post['name']
|
||||
if section_title in self.section_dict:
|
||||
section_title = self.section_dict[section_title]
|
||||
articles = []
|
||||
self.log('--> new section title: ', section_title)
|
||||
if strpost.startswith('<a href'):
|
||||
url = post['href']
|
||||
if url.startswith('/fsp'):
|
||||
url = 'http://www1.folha.uol.com.br'+url
|
||||
title = self.tag_to_string(post)
|
||||
self.log()
|
||||
self.log('--> post: ', post)
|
||||
self.log('--> url: ', url)
|
||||
self.log('--> title: ', title)
|
||||
articles.append({'title':title, 'url':url})
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
|
||||
# keeping the front page url
|
||||
minha_capa = feeds[0][1][1]['url']
|
||||
|
||||
# removing the 'Preambulo' section
|
||||
del feeds[0]
|
||||
|
||||
# creating the url for the cover image
|
||||
coverurl = feeds[0][1][0]['url']
|
||||
coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
|
||||
coverurl = coverurl.replace('01.htm', '.jpg')
|
||||
self.cover_url = coverurl
|
||||
|
||||
# inserting the cover page as the first article (nicer for kindle users)
|
||||
feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
|
||||
return feeds
|
@ -285,6 +285,15 @@ function booklist(hide_sort) {
|
||||
first_page();
|
||||
}
|
||||
|
||||
function search_result() {
|
||||
var test = $("#booklist #page0").html();
|
||||
if (!test) {
|
||||
$("#booklist").html("No books found matching this query");
|
||||
return;
|
||||
}
|
||||
booklist();
|
||||
}
|
||||
|
||||
function show_details(a_dom) {
|
||||
var book = $(a_dom).closest('div.summary');
|
||||
var bd = $('#book_details_dialog');
|
||||
|
@ -2,7 +2,7 @@
|
||||
let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
|
||||
|
||||
python << EOFPY
|
||||
import os
|
||||
import os, sys
|
||||
|
||||
import vipy
|
||||
|
||||
@ -11,8 +11,13 @@ project_dir = os.path.dirname(source_file)
|
||||
src_dir = os.path.abspath(os.path.join(project_dir, 'src'))
|
||||
base_dir = os.path.join(src_dir, 'calibre')
|
||||
|
||||
sys.path.insert(0, src_dir)
|
||||
sys.resources_location = os.path.join(project_dir, 'resources')
|
||||
sys.extensions_location = os.path.join(base_dir, 'plugins')
|
||||
sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin')
|
||||
|
||||
vipy.session.initialize(project_name='calibre', src_dir=src_dir,
|
||||
project_dir=project_dir, base_dir=base_dir)
|
||||
project_dir=project_dir, base_dir=project_dir)
|
||||
|
||||
def recipe_title_callback(raw):
|
||||
return eval(raw.decode('utf-8')).replace(' ', '_')
|
||||
|
@ -1236,7 +1236,7 @@ class StoreEbookNLStore(StoreBase):
|
||||
|
||||
headquarters = 'NL'
|
||||
formats = ['EPUB', 'PDF']
|
||||
affiliate = True
|
||||
affiliate = False
|
||||
|
||||
class StoreEbookscomStore(StoreBase):
|
||||
name = 'eBooks.com'
|
||||
|
@ -22,6 +22,8 @@ except:
|
||||
_author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
|
||||
|
||||
def string_to_authors(raw):
|
||||
if not raw:
|
||||
return []
|
||||
raw = raw.replace('&&', u'\uffff')
|
||||
raw = _author_pat.sub('&', raw)
|
||||
authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')]
|
||||
|
@ -149,6 +149,7 @@ def metadata_from_filename(name, pat=None):
|
||||
try:
|
||||
au = match.group('author')
|
||||
aus = string_to_authors(au)
|
||||
if aus:
|
||||
mi.authors = aus
|
||||
if prefs['swap_author_names'] and mi.authors:
|
||||
def swap(a):
|
||||
|
@ -25,8 +25,50 @@ class Extract(ODF2XHTML):
|
||||
with open(name, 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
def filter_css(self, html, log):
|
||||
def fix_markup(self, html, log):
|
||||
root = etree.fromstring(html)
|
||||
self.epubify_markup(root, log)
|
||||
self.filter_css(root, log)
|
||||
html = etree.tostring(root, encoding='utf-8',
|
||||
xml_declaration=True)
|
||||
return html
|
||||
|
||||
def epubify_markup(self, root, log):
|
||||
# Fix <p><div> constructs as the asinine epubchecker complains
|
||||
# about them
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML
|
||||
pdiv = XPath('//h:p/h:div')
|
||||
for div in pdiv(root):
|
||||
div.getparent().tag = XHTML('div')
|
||||
|
||||
# Remove the position:relative as it causes problems with some epub
|
||||
# renderers. Remove display: block on an image inside a div as it is
|
||||
# redundant and prevents text-align:center from working in ADE
|
||||
imgpath = XPath('//h:div/h:img[@style]')
|
||||
for img in imgpath(root):
|
||||
div = img.getparent()
|
||||
if len(div) == 1:
|
||||
style = div.attrib['style'].replace('position:relative', '')
|
||||
if style.startswith(';'): style = style[1:]
|
||||
div.attrib['style'] = style
|
||||
if img.attrib.get('style', '') == 'display: block;':
|
||||
del img.attrib['style']
|
||||
|
||||
# A div/div/img construct causes text-align:center to not work in ADE
|
||||
# so set the display of the second div to inline. This should have no
|
||||
# effect (apart from minor vspace issues) in a compliant HTML renderer
|
||||
# but it fixes the centering of the image via a text-align:center on
|
||||
# the first div in ADE
|
||||
imgpath = XPath('descendant::h:div/h:div/h:img')
|
||||
for img in imgpath(root):
|
||||
div2 = img.getparent()
|
||||
div1 = div2.getparent()
|
||||
if len(div1) == len(div2) == 1:
|
||||
style = div2.attrib['style']
|
||||
div2.attrib['style'] = 'display:inline;'+style
|
||||
|
||||
|
||||
def filter_css(self, root, log):
|
||||
style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
|
||||
if style:
|
||||
style = style[0]
|
||||
@ -40,9 +82,6 @@ class Extract(ODF2XHTML):
|
||||
extra.extend(sel_map.get(cls, []))
|
||||
if extra:
|
||||
x.set('class', orig + ' ' + ' '.join(extra))
|
||||
html = etree.tostring(root, encoding='utf-8',
|
||||
xml_declaration=True)
|
||||
return html
|
||||
|
||||
def do_filter_css(self, css):
|
||||
from cssutils import parseString
|
||||
@ -86,7 +125,7 @@ class Extract(ODF2XHTML):
|
||||
# the available screen real estate
|
||||
html = html.replace('img { width: 100%; height: 100%; }', '')
|
||||
try:
|
||||
html = self.filter_css(html, log)
|
||||
html = self.fix_markup(html, log)
|
||||
except:
|
||||
log.exception('Failed to filter CSS, conversion may be slow')
|
||||
with open('index.xhtml', 'wb') as f:
|
||||
@ -119,23 +158,4 @@ class ODTInput(InputFormatPlugin):
|
||||
accelerators):
|
||||
return Extract()(stream, '.', log)
|
||||
|
||||
def postprocess_book(self, oeb, opts, log):
|
||||
# Fix <p><div> constructs as the asinine epubchecker complains
|
||||
# about them
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML
|
||||
path = XPath('//h:p/h:div')
|
||||
path2 = XPath('//h:div[@style]/h:img[@style]')
|
||||
for item in oeb.spine:
|
||||
root = item.data
|
||||
if not hasattr(root, 'xpath'): continue
|
||||
for div in path(root):
|
||||
div.getparent().tag = XHTML('div')
|
||||
|
||||
# This construct doesn't render well in HTML
|
||||
for img in path2(root):
|
||||
div = img.getparent()
|
||||
if 'position:relative' in div.attrib['style'] and len(div) == 1 \
|
||||
and 'img' in div[0].tag:
|
||||
del div.attrib['style']
|
||||
|
||||
|
||||
|
@ -23,9 +23,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||
class EBookNLStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
url = 'http://ad.zanox.com/ppc/?19015168C29310186T'
|
||||
url_details = ('http://ad.zanox.com/ppc/?19016028C1098154549T&ULP=[['
|
||||
'http://www.ebook.nl/store/{0}]]')
|
||||
url = 'http://www.ebook.nl/'
|
||||
url_details = ('http://www.ebook.nl/store/{0}')
|
||||
|
||||
if external or self.config.get('open_external', False):
|
||||
if detail_item:
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import urllib2
|
||||
import urllib2, re
|
||||
from contextlib import closing
|
||||
|
||||
from lxml import html
|
||||
@ -67,7 +67,10 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
|
||||
title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
|
||||
author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
|
||||
price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))
|
||||
price = price[price.rfind(' '):]
|
||||
mo = re.search('£[\d\.]+', price)
|
||||
if mo is None:
|
||||
continue
|
||||
price = mo.group(0)
|
||||
|
||||
counter -= 1
|
||||
|
||||
|
@ -873,7 +873,7 @@ class BrowseServer(object):
|
||||
suffix=_('in search')+': '+xml(query))
|
||||
return self.browse_template(sort, category=False, initial_search=query).format(
|
||||
title=_('Matching books'),
|
||||
script='booklist();', main=html)
|
||||
script='search_result();', main=html)
|
||||
|
||||
# }}}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user