Merge from trunk

This commit is contained in:
Charles Haley 2011-09-28 12:05:20 +02:00
commit 269d9093c0
10 changed files with 172 additions and 46 deletions

View File

@ -0,0 +1,87 @@
from calibre.web.feeds.news import BasicNewsRecipe
class FSP(BasicNewsRecipe):
title = u'Folha de S\xE3o Paulo - Jornal'
__author__ = 'fluzao'
description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
language = 'pt'
no_stylesheets = True
max_articles_per_feed = 30
remove_javascript = True
needs_subscription = True
remove_tags_before = dict(name='b')
remove_tags_after = dict(name='!--/NOTICIA--')
remove_attributes = ['height','width']
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
# fixes the problem with the section names
section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
# this solves the problem with truncated content in Kindle
conversion_options = {'linearize_tables' : True}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('https://acesso.uol.com.br/login.html')
br.form = br.forms().next()
br['user'] = self.username
br['pass'] = self.password
br.submit().read()
## if 'Please try again' in raw:
## raise Exception('Your username and password are incorrect')
return br
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
feeds = []
articles = []
section_title = "Preambulo"
for post in soup.findAll('a'):
# if name=True => new section
strpost = str(post)
if strpost.startswith('<a name'):
if articles:
feeds.append((section_title, articles))
self.log()
self.log('--> new section found, creating old section feed: ', section_title)
section_title = post['name']
if section_title in self.section_dict:
section_title = self.section_dict[section_title]
articles = []
self.log('--> new section title: ', section_title)
if strpost.startswith('<a href'):
url = post['href']
if url.startswith('/fsp'):
url = 'http://www1.folha.uol.com.br'+url
title = self.tag_to_string(post)
self.log()
self.log('--> post: ', post)
self.log('--> url: ', url)
self.log('--> title: ', title)
articles.append({'title':title, 'url':url})
if articles:
feeds.append((section_title, articles))
# keeping the front page url
minha_capa = feeds[0][1][1]['url']
# removing the 'Preambulo' section
del feeds[0]
# creating the url for the cover image
coverurl = feeds[0][1][0]['url']
coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
coverurl = coverurl.replace('01.htm', '.jpg')
self.cover_url = coverurl
# inserting the cover page as the first article (nicer for kindle users)
feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
return feeds

View File

@ -285,6 +285,15 @@ function booklist(hide_sort) {
first_page(); first_page();
} }
function search_result() {
var test = $("#booklist #page0").html();
if (!test) {
$("#booklist").html("No books found matching this query");
return;
}
booklist();
}
function show_details(a_dom) { function show_details(a_dom) {
var book = $(a_dom).closest('div.summary'); var book = $(a_dom).closest('div.summary');
var bd = $('#book_details_dialog'); var bd = $('#book_details_dialog');

View File

@ -2,7 +2,7 @@
let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"] let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
python << EOFPY python << EOFPY
import os import os, sys
import vipy import vipy
@ -11,8 +11,13 @@ project_dir = os.path.dirname(source_file)
src_dir = os.path.abspath(os.path.join(project_dir, 'src')) src_dir = os.path.abspath(os.path.join(project_dir, 'src'))
base_dir = os.path.join(src_dir, 'calibre') base_dir = os.path.join(src_dir, 'calibre')
sys.path.insert(0, src_dir)
sys.resources_location = os.path.join(project_dir, 'resources')
sys.extensions_location = os.path.join(base_dir, 'plugins')
sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin')
vipy.session.initialize(project_name='calibre', src_dir=src_dir, vipy.session.initialize(project_name='calibre', src_dir=src_dir,
project_dir=project_dir, base_dir=base_dir) project_dir=project_dir, base_dir=project_dir)
def recipe_title_callback(raw): def recipe_title_callback(raw):
return eval(raw.decode('utf-8')).replace(' ', '_') return eval(raw.decode('utf-8')).replace(' ', '_')

View File

@ -1236,7 +1236,7 @@ class StoreEbookNLStore(StoreBase):
headquarters = 'NL' headquarters = 'NL'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
affiliate = True affiliate = False
class StoreEbookscomStore(StoreBase): class StoreEbookscomStore(StoreBase):
name = 'eBooks.com' name = 'eBooks.com'

View File

@ -22,6 +22,8 @@ except:
_author_pat = re.compile(r'(?i),?\s+(and|with)\s+') _author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
def string_to_authors(raw): def string_to_authors(raw):
if not raw:
return []
raw = raw.replace('&&', u'\uffff') raw = raw.replace('&&', u'\uffff')
raw = _author_pat.sub('&', raw) raw = _author_pat.sub('&', raw)
authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')] authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')]

View File

@ -149,6 +149,7 @@ def metadata_from_filename(name, pat=None):
try: try:
au = match.group('author') au = match.group('author')
aus = string_to_authors(au) aus = string_to_authors(au)
if aus:
mi.authors = aus mi.authors = aus
if prefs['swap_author_names'] and mi.authors: if prefs['swap_author_names'] and mi.authors:
def swap(a): def swap(a):

View File

@ -25,8 +25,50 @@ class Extract(ODF2XHTML):
with open(name, 'wb') as f: with open(name, 'wb') as f:
f.write(data) f.write(data)
def filter_css(self, html, log): def fix_markup(self, html, log):
root = etree.fromstring(html) root = etree.fromstring(html)
self.epubify_markup(root, log)
self.filter_css(root, log)
html = etree.tostring(root, encoding='utf-8',
xml_declaration=True)
return html
def epubify_markup(self, root, log):
# Fix <p><div> constructs as the asinine epubchecker complains
# about them
from calibre.ebooks.oeb.base import XPath, XHTML
pdiv = XPath('//h:p/h:div')
for div in pdiv(root):
div.getparent().tag = XHTML('div')
# Remove the position:relative as it causes problems with some epub
# renderers. Remove display: block on an image inside a div as it is
# redundant and prevents text-align:center from working in ADE
imgpath = XPath('//h:div/h:img[@style]')
for img in imgpath(root):
div = img.getparent()
if len(div) == 1:
style = div.attrib['style'].replace('position:relative', '')
if style.startswith(';'): style = style[1:]
div.attrib['style'] = style
if img.attrib.get('style', '') == 'display: block;':
del img.attrib['style']
# A div/div/img construct causes text-align:center to not work in ADE
# so set the display of the second div to inline. This should have no
# effect (apart from minor vspace issues) in a compliant HTML renderer
# but it fixes the centering of the image via a text-align:center on
# the first div in ADE
imgpath = XPath('descendant::h:div/h:div/h:img')
for img in imgpath(root):
div2 = img.getparent()
div1 = div2.getparent()
if len(div1) == len(div2) == 1:
style = div2.attrib['style']
div2.attrib['style'] = 'display:inline;'+style
def filter_css(self, root, log):
style = root.xpath('//*[local-name() = "style" and @type="text/css"]') style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
if style: if style:
style = style[0] style = style[0]
@ -40,9 +82,6 @@ class Extract(ODF2XHTML):
extra.extend(sel_map.get(cls, [])) extra.extend(sel_map.get(cls, []))
if extra: if extra:
x.set('class', orig + ' ' + ' '.join(extra)) x.set('class', orig + ' ' + ' '.join(extra))
html = etree.tostring(root, encoding='utf-8',
xml_declaration=True)
return html
def do_filter_css(self, css): def do_filter_css(self, css):
from cssutils import parseString from cssutils import parseString
@ -86,7 +125,7 @@ class Extract(ODF2XHTML):
# the available screen real estate # the available screen real estate
html = html.replace('img { width: 100%; height: 100%; }', '') html = html.replace('img { width: 100%; height: 100%; }', '')
try: try:
html = self.filter_css(html, log) html = self.fix_markup(html, log)
except: except:
log.exception('Failed to filter CSS, conversion may be slow') log.exception('Failed to filter CSS, conversion may be slow')
with open('index.xhtml', 'wb') as f: with open('index.xhtml', 'wb') as f:
@ -119,23 +158,4 @@ class ODTInput(InputFormatPlugin):
accelerators): accelerators):
return Extract()(stream, '.', log) return Extract()(stream, '.', log)
def postprocess_book(self, oeb, opts, log):
# Fix <p><div> constructs as the asinine epubchecker complains
# about them
from calibre.ebooks.oeb.base import XPath, XHTML
path = XPath('//h:p/h:div')
path2 = XPath('//h:div[@style]/h:img[@style]')
for item in oeb.spine:
root = item.data
if not hasattr(root, 'xpath'): continue
for div in path(root):
div.getparent().tag = XHTML('div')
# This construct doesn't render well in HTML
for img in path2(root):
div = img.getparent()
if 'position:relative' in div.attrib['style'] and len(div) == 1 \
and 'img' in div[0].tag:
del div.attrib['style']

View File

@ -23,9 +23,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
class EBookNLStore(BasicStoreConfig, StorePlugin): class EBookNLStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
url = 'http://ad.zanox.com/ppc/?19015168C29310186T' url = 'http://www.ebook.nl/'
url_details = ('http://ad.zanox.com/ppc/?19016028C1098154549T&ULP=[[' url_details = ('http://www.ebook.nl/store/{0}')
'http://www.ebook.nl/store/{0}]]')
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
if detail_item: if detail_item:

View File

@ -6,7 +6,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import urllib2 import urllib2, re
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -67,7 +67,10 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
title = ''.join(data.xpath('.//a[@class="Title"]/text()')) title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
author = ', '.join(data.xpath('.//span[@class="Author"]/text()')) author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()')) price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))
price = price[price.rfind(' '):] mo = re.search('£[\d\.]+', price)
if mo is None:
continue
price = mo.group(0)
counter -= 1 counter -= 1

View File

@ -873,7 +873,7 @@ class BrowseServer(object):
suffix=_('in search')+': '+xml(query)) suffix=_('in search')+': '+xml(query))
return self.browse_template(sort, category=False, initial_search=query).format( return self.browse_template(sort, category=False, initial_search=query).format(
title=_('Matching books'), title=_('Matching books'),
script='booklist();', main=html) script='search_result();', main=html)
# }}} # }}}