diff --git a/recipes/folhadesaopaulo_sub.recipe b/recipes/folhadesaopaulo_sub.recipe new file mode 100644 index 0000000000..660275330a --- /dev/null +++ b/recipes/folhadesaopaulo_sub.recipe @@ -0,0 +1,87 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class FSP(BasicNewsRecipe): + + title = u'Folha de S\xE3o Paulo - Jornal' + __author__ = 'fluzao' + description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \ + u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]' + INDEX = 'http://www1.folha.uol.com.br/fsp/indices/' + language = 'pt' + no_stylesheets = True + max_articles_per_feed = 30 + remove_javascript = True + needs_subscription = True + remove_tags_before = dict(name='b') + remove_tags_after = dict(name='!--/NOTICIA--') + remove_attributes = ['height','width'] + masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif' + + # fixes the problem with the section names + section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \ + 'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \ + 'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \ + 'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'} + + # this solves the problem with truncated content in Kindle + conversion_options = {'linearize_tables' : True} + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('https://acesso.uol.com.br/login.html') + br.form = br.forms().next() + br['user'] = self.username + br['pass'] = self.password + br.submit().read() +## if 'Please try again' in raw: +## raise Exception('Your username and password are incorrect') + return br + + + def parse_index(self): + soup = self.index_to_soup(self.INDEX) + feeds = [] + articles = [] + section_title = "Preambulo" + for post in soup.findAll('a'): + # if name=True => new section + strpost = str(post) + if strpost.startswith(' new section found, creating old section feed: ', section_title) + section_title = post['name'] + if section_title in self.section_dict: + section_title = self.section_dict[section_title] + articles = [] + self.log('--> new section title: ', section_title) + if strpost.startswith(' post: ', post) + self.log('--> url: ', url) + self.log('--> title: ', title) + articles.append({'title':title, 'url':url}) + if articles: + feeds.append((section_title, articles)) + + # keeping the front page url + minha_capa = feeds[0][1][1]['url'] + + # removing the 'Preambulo' section + del feeds[0] + + # creating the url for the cover image + coverurl = feeds[0][1][0]['url'] + coverurl = coverurl.replace('/opiniao/fz', '/images/cp') + coverurl = coverurl.replace('01.htm', '.jpg') + self.cover_url = coverurl + + # inserting the cover page as the first article (nicer for kindle users) + feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}])) + return feeds diff --git a/resources/content_server/browse/browse.js b/resources/content_server/browse/browse.js index 1fb4cf4f70..6783f83a1d 100644 --- a/resources/content_server/browse/browse.js +++ b/resources/content_server/browse/browse.js @@ -285,6 +285,15 @@ function booklist(hide_sort) { first_page(); } +function search_result() { + var test = $("#booklist #page0").html(); + if (!test) { + $("#booklist").html("No books found matching this query"); + return; + } + booklist(); +} + function show_details(a_dom) { var book = $(a_dom).closest('div.summary'); var bd = $('#book_details_dialog'); diff --git a/session.vim b/session.vim index 848dbef936..6252c038db 100644 --- a/session.vim +++ b/session.vim @@ -2,7 +2,7 @@ let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"] python << EOFPY -import os +import os, sys import vipy @@ -11,8 +11,13 @@ project_dir = os.path.dirname(source_file) src_dir = os.path.abspath(os.path.join(project_dir, 'src')) base_dir = os.path.join(src_dir, 'calibre') +sys.path.insert(0, src_dir) +sys.resources_location = os.path.join(project_dir, 'resources') +sys.extensions_location = os.path.join(base_dir, 'plugins') +sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin') + vipy.session.initialize(project_name='calibre', src_dir=src_dir, - project_dir=project_dir, base_dir=base_dir) + project_dir=project_dir, base_dir=project_dir) def recipe_title_callback(raw): return eval(raw.decode('utf-8')).replace(' ', '_') diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 1474b540ee..c28ec814ba 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1236,7 +1236,7 @@ class StoreEbookNLStore(StoreBase): headquarters = 'NL' formats = ['EPUB', 'PDF'] - affiliate = True + affiliate = False class StoreEbookscomStore(StoreBase): name = 'eBooks.com' diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index dd90b4c2ec..decba3b780 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -22,6 +22,8 @@ except: _author_pat = re.compile(r'(?i),?\s+(and|with)\s+') def string_to_authors(raw): + if not raw: + return [] raw = raw.replace('&&', u'\uffff') raw = _author_pat.sub('&', raw) authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')] diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index d82a2268fa..7d8855de14 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -149,19 +149,20 @@ def metadata_from_filename(name, pat=None): try: au = match.group('author') aus = string_to_authors(au) - mi.authors = aus - if prefs['swap_author_names'] and mi.authors: - def swap(a): - if ',' in a: - parts = a.split(',', 1) - else: - parts = a.split(None, 1) - if len(parts) > 1: - t = parts[-1] - parts = parts[:-1] - parts.insert(0, t) - return ' '.join(parts) - mi.authors = [swap(x) for x in mi.authors] + if aus: + mi.authors = aus + if prefs['swap_author_names'] and mi.authors: + def swap(a): + if ',' in a: + parts = a.split(',', 1) + else: + parts = a.split(None, 1) + if len(parts) > 1: + t = parts[-1] + parts = parts[:-1] + parts.insert(0, t) + return ' '.join(parts) + mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: diff --git a/src/calibre/ebooks/odt/input.py b/src/calibre/ebooks/odt/input.py index e724acb981..927c43f66d 100644 --- a/src/calibre/ebooks/odt/input.py +++ b/src/calibre/ebooks/odt/input.py @@ -25,8 +25,50 @@ class Extract(ODF2XHTML): with open(name, 'wb') as f: f.write(data) - def filter_css(self, html, log): + def fix_markup(self, html, log): root = etree.fromstring(html) + self.epubify_markup(root, log) + self.filter_css(root, log) + html = etree.tostring(root, encoding='utf-8', + xml_declaration=True) + return html + + def epubify_markup(self, root, log): + # Fix

constructs as the asinine epubchecker complains + # about them + from calibre.ebooks.oeb.base import XPath, XHTML + pdiv = XPath('//h:p/h:div') + for div in pdiv(root): + div.getparent().tag = XHTML('div') + + # Remove the position:relative as it causes problems with some epub + # renderers. Remove display: block on an image inside a div as it is + # redundant and prevents text-align:center from working in ADE + imgpath = XPath('//h:div/h:img[@style]') + for img in imgpath(root): + div = img.getparent() + if len(div) == 1: + style = div.attrib['style'].replace('position:relative', '') + if style.startswith(';'): style = style[1:] + div.attrib['style'] = style + if img.attrib.get('style', '') == 'display: block;': + del img.attrib['style'] + + # A div/div/img construct causes text-align:center to not work in ADE + # so set the display of the second div to inline. This should have no + # effect (apart from minor vspace issues) in a compliant HTML renderer + # but it fixes the centering of the image via a text-align:center on + # the first div in ADE + imgpath = XPath('descendant::h:div/h:div/h:img') + for img in imgpath(root): + div2 = img.getparent() + div1 = div2.getparent() + if len(div1) == len(div2) == 1: + style = div2.attrib['style'] + div2.attrib['style'] = 'display:inline;'+style + + + def filter_css(self, root, log): style = root.xpath('//*[local-name() = "style" and @type="text/css"]') if style: style = style[0] @@ -40,9 +82,6 @@ class Extract(ODF2XHTML): extra.extend(sel_map.get(cls, [])) if extra: x.set('class', orig + ' ' + ' '.join(extra)) - html = etree.tostring(root, encoding='utf-8', - xml_declaration=True) - return html def do_filter_css(self, css): from cssutils import parseString @@ -86,7 +125,7 @@ class Extract(ODF2XHTML): # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') try: - html = self.filter_css(html, log) + html = self.fix_markup(html, log) except: log.exception('Failed to filter CSS, conversion may be slow') with open('index.xhtml', 'wb') as f: @@ -119,23 +158,4 @@ class ODTInput(InputFormatPlugin): accelerators): return Extract()(stream, '.', log) - def postprocess_book(self, oeb, opts, log): - # Fix

constructs as the asinine epubchecker complains - # about them - from calibre.ebooks.oeb.base import XPath, XHTML - path = XPath('//h:p/h:div') - path2 = XPath('//h:div[@style]/h:img[@style]') - for item in oeb.spine: - root = item.data - if not hasattr(root, 'xpath'): continue - for div in path(root): - div.getparent().tag = XHTML('div') - - # This construct doesn't render well in HTML - for img in path2(root): - div = img.getparent() - if 'position:relative' in div.attrib['style'] and len(div) == 1 \ - and 'img' in div[0].tag: - del div.attrib['style'] - diff --git a/src/calibre/gui2/store/stores/ebook_nl_plugin.py b/src/calibre/gui2/store/stores/ebook_nl_plugin.py index e18ca7de72..0a79026dbb 100644 --- a/src/calibre/gui2/store/stores/ebook_nl_plugin.py +++ b/src/calibre/gui2/store/stores/ebook_nl_plugin.py @@ -23,9 +23,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog class EBookNLStore(BasicStoreConfig, StorePlugin): def open(self, parent=None, detail_item=None, external=False): - url = 'http://ad.zanox.com/ppc/?19015168C29310186T' - url_details = ('http://ad.zanox.com/ppc/?19016028C1098154549T&ULP=[[' - 'http://www.ebook.nl/store/{0}]]') + url = 'http://www.ebook.nl/' + url_details = ('http://www.ebook.nl/store/{0}') if external or self.config.get('open_external', False): if detail_item: diff --git a/src/calibre/gui2/store/stores/foyles_uk_plugin.py b/src/calibre/gui2/store/stores/foyles_uk_plugin.py index fd670d2d85..0e5ccfad01 100644 --- a/src/calibre/gui2/store/stores/foyles_uk_plugin.py +++ b/src/calibre/gui2/store/stores/foyles_uk_plugin.py @@ -6,7 +6,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' -import urllib2 +import urllib2, re from contextlib import closing from lxml import html @@ -67,7 +67,10 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin): title = ''.join(data.xpath('.//a[@class="Title"]/text()')) author = ', '.join(data.xpath('.//span[@class="Author"]/text()')) price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()')) - price = price[price.rfind(' '):] + mo = re.search('£[\d\.]+', price) + if mo is None: + continue + price = mo.group(0) counter -= 1 diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index fd9e568163..5b7d732820 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -873,7 +873,7 @@ class BrowseServer(object): suffix=_('in search')+': '+xml(query)) return self.browse_template(sort, category=False, initial_search=query).format( title=_('Matching books'), - script='booklist();', main=html) + script='search_result();', main=html) # }}}