Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-09-28 12:05:20 +02:00 · 2011-09-28 12:05:20 +02:00 · 269d9093c0
commit 269d9093c0
parent d086acf076 2701005fb5
10 changed files with 172 additions and 46 deletions
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@ -0,0 +1,87 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class FSP(BasicNewsRecipe):
    title      = u'Folha de S\xE3o Paulo - Jornal'
    __author__ = 'fluzao'
    description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
                  u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
    INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
    language = 'pt'
    no_stylesheets = True
    max_articles_per_feed  = 30
    remove_javascript     = True
    needs_subscription = True
    remove_tags_before = dict(name='b')
    remove_tags_after  = dict(name='!--/NOTICIA--')
    remove_attributes = ['height','width']
    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
    # fixes the problem with the section names
    section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
                    'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
                    'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
    # this solves the problem with truncated content in Kindle
    conversion_options = {'linearize_tables' : True}
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('https://acesso.uol.com.br/login.html')
            br.form = br.forms().next()
            br['user']   = self.username
            br['pass'] = self.password
            br.submit().read()
 ##            if 'Please try again' in raw:
 ##                raise Exception('Your username and password are incorrect')
        return br
    def parse_index(self):
        soup = self.index_to_soup(self.INDEX)
        feeds = []
        articles = []
        section_title = "Preambulo"
        for post in soup.findAll('a'):
            # if name=True => new section
            strpost = str(post)
            if strpost.startswith('<a name'):
                if articles:
                    feeds.append((section_title, articles))
                    self.log()
                    self.log('--> new section found, creating old section feed: ', section_title)
                section_title = post['name']
                if section_title in self.section_dict:
                    section_title = self.section_dict[section_title]
                articles = []
                self.log('--> new section title:   ', section_title)
            if strpost.startswith('<a href'):
                url = post['href']
                if url.startswith('/fsp'):
                    url = 'http://www1.folha.uol.com.br'+url
                    title = self.tag_to_string(post)
                    self.log()
                    self.log('--> post:  ', post)
                    self.log('--> url:   ', url)
                    self.log('--> title: ', title)
                    articles.append({'title':title, 'url':url})
        if articles:
            feeds.append((section_title, articles))
        # keeping the front page url
        minha_capa = feeds[0][1][1]['url']
        # removing the 'Preambulo' section
        del feeds[0]
        # creating the url for the cover image
        coverurl = feeds[0][1][0]['url']
        coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
        coverurl = coverurl.replace('01.htm', '.jpg')
        self.cover_url = coverurl
        # inserting the cover page as the first article (nicer for kindle users)
        feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
        return feeds
--- a/resources/content_server/browse/browse.js
+++ b/resources/content_server/browse/browse.js
@ -285,6 +285,15 @@ function booklist(hide_sort) {
    first_page(); 
 }
 function search_result() {
    var test = $("#booklist #page0").html();
    if (!test) {
        $("#booklist").html("No books found matching this query");
        return;
    }
    booklist();
 }
 function show_details(a_dom) {
    var book = $(a_dom).closest('div.summary');
    var bd = $('#book_details_dialog');
--- a/session.vim
+++ b/session.vim
@ -2,7 +2,7 @@
 let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
 python << EOFPY
-import os
+import os, sys
 import vipy
@ -11,8 +11,13 @@ project_dir = os.path.dirname(source_file)
 src_dir = os.path.abspath(os.path.join(project_dir, 'src'))
 base_dir = os.path.join(src_dir, 'calibre')
 sys.path.insert(0, src_dir)
 sys.resources_location = os.path.join(project_dir, 'resources')
 sys.extensions_location = os.path.join(base_dir, 'plugins')
 sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin')
 vipy.session.initialize(project_name='calibre', src_dir=src_dir,
-            project_dir=project_dir, base_dir=base_dir)
+            project_dir=project_dir, base_dir=project_dir)
 def recipe_title_callback(raw):
    return eval(raw.decode('utf-8')).replace(' ', '_')
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1236,7 +1236,7 @@ class StoreEbookNLStore(StoreBase):
    headquarters = 'NL'
    formats = ['EPUB', 'PDF']
-    affiliate = True
+    affiliate = False
 class StoreEbookscomStore(StoreBase):
    name = 'eBooks.com'
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -22,6 +22,8 @@ except:
    _author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
 def string_to_authors(raw):
    if not raw:
        return []
    raw = raw.replace('&&', u'\uffff')
    raw = _author_pat.sub('&', raw)
    authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')]
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -149,6 +149,7 @@ def metadata_from_filename(name, pat=None):
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:
                    def swap(a):
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -25,8 +25,50 @@ class Extract(ODF2XHTML):
                with open(name, 'wb') as f:
                    f.write(data)
-    def filter_css(self, html, log):
+    def fix_markup(self, html, log):
        root = etree.fromstring(html)
        self.epubify_markup(root, log)
        self.filter_css(root, log)
        html = etree.tostring(root, encoding='utf-8',
                xml_declaration=True)
        return html
    def epubify_markup(self, root, log):
        # Fix <p><div> constructs as the asinine epubchecker complains
        # about them
        from calibre.ebooks.oeb.base import XPath, XHTML
        pdiv = XPath('//h:p/h:div')
        for div in pdiv(root):
            div.getparent().tag = XHTML('div')
        # Remove the position:relative as it causes problems with some epub
        # renderers. Remove display: block on an image inside a div as it is
        # redundant and prevents text-align:center from working in ADE
        imgpath = XPath('//h:div/h:img[@style]')
        for img in imgpath(root):
            div = img.getparent()
            if len(div) == 1:
                style = div.attrib['style'].replace('position:relative', '')
                if style.startswith(';'): style = style[1:]
                div.attrib['style'] = style
                if img.attrib.get('style', '') == 'display: block;':
                    del img.attrib['style']
        # A div/div/img construct causes text-align:center to not work in ADE
        # so set the display of the second div to inline. This should have no
        # effect (apart from minor vspace issues) in a compliant HTML renderer
        # but it fixes the centering of the image via a text-align:center on
        # the first div in ADE
        imgpath = XPath('descendant::h:div/h:div/h:img')
        for img in imgpath(root):
            div2 = img.getparent()
            div1 = div2.getparent()
            if len(div1) == len(div2) == 1:
                style = div2.attrib['style']
                div2.attrib['style'] = 'display:inline;'+style
    def filter_css(self, root, log):
        style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
        if style:
            style = style[0]
@ -40,9 +82,6 @@ class Extract(ODF2XHTML):
                        extra.extend(sel_map.get(cls, []))
                    if extra:
                        x.set('class', orig + ' ' + ' '.join(extra))
                html = etree.tostring(root, encoding='utf-8',
                        xml_declaration=True)
        return html
    def do_filter_css(self, css):
        from cssutils import parseString
@ -86,7 +125,7 @@ class Extract(ODF2XHTML):
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            try:
-                html = self.filter_css(html, log)
+                html = self.fix_markup(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
@ -119,23 +158,4 @@ class ODTInput(InputFormatPlugin):
                accelerators):
        return Extract()(stream, '.', log)
    def postprocess_book(self, oeb, opts, log):
        # Fix <p><div> constructs as the asinine epubchecker complains
        # about them
        from calibre.ebooks.oeb.base import XPath, XHTML
        path = XPath('//h:p/h:div')
        path2 = XPath('//h:div[@style]/h:img[@style]')
        for item in oeb.spine:
            root = item.data
            if not hasattr(root, 'xpath'): continue
            for div in path(root):
                div.getparent().tag = XHTML('div')
            # This construct doesn't render well in HTML
            for img in path2(root):
                div = img.getparent()
                if 'position:relative' in div.attrib['style'] and len(div) == 1 \
                    and 'img' in div[0].tag:
                    del div.attrib['style']
--- a/src/calibre/gui2/store/stores/ebook_nl_plugin.py
+++ b/src/calibre/gui2/store/stores/ebook_nl_plugin.py
@ -23,9 +23,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class EBookNLStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://ad.zanox.com/ppc/?19015168C29310186T'
+        url = 'http://www.ebook.nl/'
-        url_details = ('http://ad.zanox.com/ppc/?19016028C1098154549T&ULP=[['
+        url_details = ('http://www.ebook.nl/store/{0}')
                       'http://www.ebook.nl/store/{0}]]')
        if external or self.config.get('open_external', False):
            if detail_item:
--- a/src/calibre/gui2/store/stores/foyles_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/foyles_uk_plugin.py
@ -6,7 +6,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import urllib2
+import urllib2, re
 from contextlib import closing
 from lxml import html
@ -67,7 +67,10 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
                title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
                author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
                price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))
-                price = price[price.rfind(' '):]
+                mo = re.search('£[\d\.]+', price)
                if mo is None:
                    continue
                price = mo.group(0)
                counter -= 1
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@ -873,7 +873,7 @@ class BrowseServer(object):
                suffix=_('in search')+': '+xml(query))
        return self.browse_template(sort, category=False, initial_search=query).format(
                title=_('Matching books'),
-                script='booklist();', main=html)
+                script='search_result();', main=html)
    # }}}