Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-09-28 12:05:20 +02:00 · 2011-09-28 12:05:20 +02:00 · 269d9093c0
commit 269d9093c0
parent d086acf076 2701005fb5
10 changed files with 172 additions and 46 deletions
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@ -0,0 +1,87 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FSP(BasicNewsRecipe):
+
+    title      = u'Folha de S\xE3o Paulo - Jornal'
+    __author__ = 'fluzao'
+    description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
+                  u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
+    INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
+    language = 'pt'
+    no_stylesheets = True
+    max_articles_per_feed  = 30
+    remove_javascript     = True
+    needs_subscription = True
+    remove_tags_before = dict(name='b')
+    remove_tags_after  = dict(name='!--/NOTICIA--')
+    remove_attributes = ['height','width']
+    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
+
+    # fixes the problem with the section names
+    section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
+                    'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
+                    'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
+                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
+
+    # this solves the problem with truncated content in Kindle
+    conversion_options = {'linearize_tables' : True}
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('https://acesso.uol.com.br/login.html')
+            br.form = br.forms().next()
+            br['user']   = self.username
+            br['pass'] = self.password
+            br.submit().read()
+##            if 'Please try again' in raw:
+##                raise Exception('Your username and password are incorrect')
+        return br
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.INDEX)
+        feeds = []
+        articles = []
+        section_title = "Preambulo"
+        for post in soup.findAll('a'):
+            # if name=True => new section
+            strpost = str(post)
+            if strpost.startswith('<a name'):
+                if articles:
+                    feeds.append((section_title, articles))
+                    self.log()
+                    self.log('--> new section found, creating old section feed: ', section_title)
+                section_title = post['name']
+                if section_title in self.section_dict:
+                    section_title = self.section_dict[section_title]
+                articles = []
+                self.log('--> new section title:   ', section_title)
+            if strpost.startswith('<a href'):
+                url = post['href']
+                if url.startswith('/fsp'):
+                    url = 'http://www1.folha.uol.com.br'+url
+                    title = self.tag_to_string(post)
+                    self.log()
+                    self.log('--> post:  ', post)
+                    self.log('--> url:   ', url)
+                    self.log('--> title: ', title)
+                    articles.append({'title':title, 'url':url})
+        if articles:
+            feeds.append((section_title, articles))
+
+        # keeping the front page url
+        minha_capa = feeds[0][1][1]['url']
+
+        # removing the 'Preambulo' section
+        del feeds[0]
+
+        # creating the url for the cover image
+        coverurl = feeds[0][1][0]['url']
+        coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
+        coverurl = coverurl.replace('01.htm', '.jpg')
+        self.cover_url = coverurl
+
+        # inserting the cover page as the first article (nicer for kindle users)
+        feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
+        return feeds
--- a/resources/content_server/browse/browse.js
+++ b/resources/content_server/browse/browse.js
@ -285,6 +285,15 @@ function booklist(hide_sort) {
    first_page(); 
 }

+function search_result() {
+    var test = $("#booklist #page0").html();
+    if (!test) {
+        $("#booklist").html("No books found matching this query");
+        return;
+    }
+    booklist();
+}
+
 function show_details(a_dom) {
    var book = $(a_dom).closest('div.summary');
    var bd = $('#book_details_dialog');
--- a/session.vim
+++ b/session.vim
@ -2,7 +2,7 @@
 let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]

 python << EOFPY
-import os
+import os, sys

 import vipy

@ -11,8 +11,13 @@ project_dir = os.path.dirname(source_file)
 src_dir = os.path.abspath(os.path.join(project_dir, 'src'))
 base_dir = os.path.join(src_dir, 'calibre')

+sys.path.insert(0, src_dir)
+sys.resources_location = os.path.join(project_dir, 'resources')
+sys.extensions_location = os.path.join(base_dir, 'plugins')
+sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin')
+
 vipy.session.initialize(project_name='calibre', src_dir=src_dir,
-            project_dir=project_dir, base_dir=base_dir)
+            project_dir=project_dir, base_dir=project_dir)

 def recipe_title_callback(raw):
    return eval(raw.decode('utf-8')).replace(' ', '_')
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1236,7 +1236,7 @@ class StoreEbookNLStore(StoreBase):

    headquarters = 'NL'
    formats = ['EPUB', 'PDF']
-    affiliate = True
+    affiliate = False

 class StoreEbookscomStore(StoreBase):
    name = 'eBooks.com'
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -22,6 +22,8 @@ except:
    _author_pat = re.compile(r'(?i),?\s+(and|with)\s+')

 def string_to_authors(raw):
+    if not raw:
+        return []
    raw = raw.replace('&&', u'\uffff')
    raw = _author_pat.sub('&', raw)
    authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')]
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -149,6 +149,7 @@ def metadata_from_filename(name, pat=None):
        try:
            au = match.group('author')
            aus = string_to_authors(au)
+            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:
                    def swap(a):
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -25,8 +25,50 @@ class Extract(ODF2XHTML):
                with open(name, 'wb') as f:
                    f.write(data)

-    def filter_css(self, html, log):
+    def fix_markup(self, html, log):
        root = etree.fromstring(html)
+        self.epubify_markup(root, log)
+        self.filter_css(root, log)
+        html = etree.tostring(root, encoding='utf-8',
+                xml_declaration=True)
+        return html
+
+    def epubify_markup(self, root, log):
+        # Fix <p><div> constructs as the asinine epubchecker complains
+        # about them
+        from calibre.ebooks.oeb.base import XPath, XHTML
+        pdiv = XPath('//h:p/h:div')
+        for div in pdiv(root):
+            div.getparent().tag = XHTML('div')
+
+        # Remove the position:relative as it causes problems with some epub
+        # renderers. Remove display: block on an image inside a div as it is
+        # redundant and prevents text-align:center from working in ADE
+        imgpath = XPath('//h:div/h:img[@style]')
+        for img in imgpath(root):
+            div = img.getparent()
+            if len(div) == 1:
+                style = div.attrib['style'].replace('position:relative', '')
+                if style.startswith(';'): style = style[1:]
+                div.attrib['style'] = style
+                if img.attrib.get('style', '') == 'display: block;':
+                    del img.attrib['style']
+
+        # A div/div/img construct causes text-align:center to not work in ADE
+        # so set the display of the second div to inline. This should have no
+        # effect (apart from minor vspace issues) in a compliant HTML renderer
+        # but it fixes the centering of the image via a text-align:center on
+        # the first div in ADE
+        imgpath = XPath('descendant::h:div/h:div/h:img')
+        for img in imgpath(root):
+            div2 = img.getparent()
+            div1 = div2.getparent()
+            if len(div1) == len(div2) == 1:
+                style = div2.attrib['style']
+                div2.attrib['style'] = 'display:inline;'+style
+
+
+    def filter_css(self, root, log):
        style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
        if style:
            style = style[0]
@ -40,9 +82,6 @@ class Extract(ODF2XHTML):
                        extra.extend(sel_map.get(cls, []))
                    if extra:
                        x.set('class', orig + ' ' + ' '.join(extra))
-                html = etree.tostring(root, encoding='utf-8',
-                        xml_declaration=True)
-        return html

    def do_filter_css(self, css):
        from cssutils import parseString
@ -86,7 +125,7 @@ class Extract(ODF2XHTML):
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            try:
-                html = self.filter_css(html, log)
+                html = self.fix_markup(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
@ -119,23 +158,4 @@ class ODTInput(InputFormatPlugin):
                accelerators):
        return Extract()(stream, '.', log)

-    def postprocess_book(self, oeb, opts, log):
-        # Fix <p><div> constructs as the asinine epubchecker complains
-        # about them
-        from calibre.ebooks.oeb.base import XPath, XHTML
-        path = XPath('//h:p/h:div')
-        path2 = XPath('//h:div[@style]/h:img[@style]')
-        for item in oeb.spine:
-            root = item.data
-            if not hasattr(root, 'xpath'): continue
-            for div in path(root):
-                div.getparent().tag = XHTML('div')
-
-            # This construct doesn't render well in HTML
-            for img in path2(root):
-                div = img.getparent()
-                if 'position:relative' in div.attrib['style'] and len(div) == 1 \
-                    and 'img' in div[0].tag:
-                    del div.attrib['style']
-

--- a/src/calibre/gui2/store/stores/ebook_nl_plugin.py
+++ b/src/calibre/gui2/store/stores/ebook_nl_plugin.py
@ -23,9 +23,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class EBookNLStore(BasicStoreConfig, StorePlugin):

    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://ad.zanox.com/ppc/?19015168C29310186T'
-        url_details = ('http://ad.zanox.com/ppc/?19016028C1098154549T&ULP=[['
-                       'http://www.ebook.nl/store/{0}]]')
+        url = 'http://www.ebook.nl/'
+        url_details = ('http://www.ebook.nl/store/{0}')

        if external or self.config.get('open_external', False):
            if detail_item:
--- a/src/calibre/gui2/store/stores/foyles_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/foyles_uk_plugin.py
@ -6,7 +6,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import urllib2
+import urllib2, re
 from contextlib import closing

 from lxml import html
@ -67,7 +67,10 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
                title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
                author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
                price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))
-                price = price[price.rfind(' '):]
+                mo = re.search('£[\d\.]+', price)
+                if mo is None:
+                    continue
+                price = mo.group(0)

                counter -= 1

--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@ -873,7 +873,7 @@ class BrowseServer(object):
                suffix=_('in search')+': '+xml(query))
        return self.browse_template(sort, category=False, initial_search=query).format(
                title=_('Matching books'),
-                script='booklist();', main=html)
+                script='search_result();', main=html)

    # }}}