Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-06-24 07:41:55 +01:00 · 2010-06-24 07:41:55 +01:00 · bb6cacd205
commit bb6cacd205
parent 55b9a96fd8 195a3a9cd1
12 changed files with 207 additions and 38 deletions
--- a/resources/images/news/lrb.png
+++ b/resources/images/news/lrb.png
--- a/resources/images/news/lrb_payed.png
+++ b/resources/images/news/lrb_payed.png
--- a/resources/recipes/lrb.recipe
+++ b/resources/recipes/lrb.recipe
@ -1,6 +1,6 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lrb.co.uk
 '''
@ -8,17 +8,20 @@ lrb.co.uk
 from calibre.web.feeds.news import BasicNewsRecipe
 class LondonReviewOfBooks(BasicNewsRecipe):
-    title                 = u'London Review of Books'
+    title                 = 'London Review of Books (free)'
-    __author__            = u'Darko Miletic'
+    __author__            = 'Darko Miletic'
-    description           = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
+    description           = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
-    category              = 'news, literature, England'
+    category              = 'news, literature, UK'
-    publisher             = 'London Review of Books'
+    publisher             = 'LRB ltd.'
-    oldest_article        = 7
+    oldest_article        = 15
    max_articles_per_feed = 100
    language              = 'en_GB'
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    publication_type      = 'magazine'
    masthead_url          = 'http://www.lrb.co.uk/assets/images/lrb_logo_big.gif'
    extra_css             = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
    conversion_options = {
                             'comments'  : description
@ -27,13 +30,16 @@ class LondonReviewOfBooks(BasicNewsRecipe):
                            ,'publisher' : publisher
                         }
-    keep_only_tags = [dict(name='div' , attrs={'id'   :'main'})]
+    keep_only_tags = [dict(attrs={'class':['article-body indent','letters','article-list']})]
-    remove_tags = [
+    remove_attributes = ['width','height']
                    dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']})
                   ,dict(name='div' , attrs={'id'   :['mainmenu','precontent','otherarticles']     })
                   ,dict(name='span', attrs={'class':['inlineright','article-icons']})
                   ,dict(name='ul'  , attrs={'class':'article-controls'})
                   ,dict(name='p'   , attrs={'class':'meta-info'       })
                  ]
    feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup('http://www.lrb.co.uk/')
        cover_item = soup.find('p',attrs={'class':'cover'})
        if cover_item:
           cover_url = 'http://www.lrb.co.uk' + cover_item.a.img['src']
        return cover_url
--- a/resources/recipes/lrb_payed.recipe
+++ b/resources/recipes/lrb_payed.recipe
@ -0,0 +1,75 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lrb.co.uk
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class LondonReviewOfBooksPayed(BasicNewsRecipe):
    title                 = 'London Review of Books'
    __author__            = 'Darko Miletic'
    description           = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
    category              = 'news, literature, UK'
    publisher             = 'LRB Ltd.'
    max_articles_per_feed = 100
    language              = 'en_GB'
    no_stylesheets        = True
    delay                 = 1
    use_embedded_content  = False
    encoding              = 'utf-8'
    INDEX                 = 'http://www.lrb.co.uk'
    LOGIN                 = INDEX + '/login'
    masthead_url          = INDEX + '/assets/images/lrb_logo_big.gif'
    needs_subscription    = True
    publication_type      = 'magazine'
    extra_css             = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.LOGIN)
            br.select_form(nr=1)
            br['username'] = self.username
            br['password'] = self.password
            br.submit()
        return br
    def parse_index(self):
        articles = []
        soup = self.index_to_soup(self.INDEX)
        cover_item = soup.find('p',attrs={'class':'cover'})
        lrbtitle = self.title
        if  cover_item:
            self.cover_url = self.INDEX + cover_item.a.img['src']
            content = self.INDEX + cover_item.a['href']
            soup2 = self.index_to_soup(content)
            sitem = soup2.find(attrs={'class':'article-list'})
            lrbtitle = soup2.head.title.string
            for item in sitem.findAll('a',attrs={'class':'title'}):
                description = u''
                title_prefix = u''
                feed_link = item
                if feed_link.has_key('href'):
                    url   = self.INDEX + feed_link['href']
                    title = title_prefix + self.tag_to_string(feed_link)
                    date  = strftime(self.timefmt)
                    articles.append({
                                      'title'      :title
                                     ,'date'       :date
                                     ,'url'        :url
                                     ,'description':description
                                    })
        return [(lrbtitle, articles)]
    conversion_options = {
                             'comments'  : description
                            ,'tags'      : category
                            ,'language'  : language
                            ,'publisher' : publisher
                         }
    keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})]
    remove_attributes = ['width','height']
--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@ -172,5 +172,10 @@ class CollectionsBookList(BookList):
        For each book in the booklist for the card oncard, remove it from all
        its current collections, then add it to the collections specified in
        device_collections.
        oncard is None for the main memory, carda for card A, cardb for card B,
        etc.
        booklist is the object created by the :method:`books` call above.
        '''
        pass
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -107,9 +107,21 @@ class CSSPreProcessor(object):
    PAGE_PAT   = re.compile(r'@page[^{]*?{[^}]*?}')
-    def __call__(self, data):
+    def __call__(self, data, add_namespace=False):
        from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
        data = self.PAGE_PAT.sub('', data)
        if not add_namespace:
            return data
        ans, namespaced = [], False
        for line in data.splitlines():
            ll = line.lstrip()
            if not (namespaced or ll.startswith('@import') or
                        ll.startswith('@charset')):
                ans.append(XHTML_CSS_NAMESPACE.strip())
                namespaced = True
            ans.append(line)
        return u'\n'.join(ans)
 class HTMLPreProcessor(object):
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -20,7 +20,7 @@ from itertools import izip
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.customize.conversion import OptionRecommendation
-from calibre.constants import islinux, isfreebsd
+from calibre.constants import islinux, isfreebsd, iswindows
 from calibre import unicode_path
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
@ -32,9 +32,14 @@ class Link(object):
    @classmethod
    def url_to_local_path(cls, url, base):
-        path = urlunparse(('', '', url.path, url.params, url.query, ''))
+        path = url.path
        isabs = False
        if iswindows and path.startswith('/'):
            path = path[1:]
            isabs = True
        path = urlunparse(('', '', path, url.params, url.query, ''))
        path = unquote(path)
-        if os.path.isabs(path):
+        if isabs or os.path.isabs(path):
            return path
        return os.path.abspath(os.path.join(base, path))
@ -307,6 +312,7 @@ class HTMLInput(InputFormatPlugin):
            xpath
        from calibre import guess_type
        import cssutils
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb
@ -371,7 +377,7 @@ class HTMLInput(InputFormatPlugin):
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))
        for item in oeb.manifest.values():
-            if item.media_type in OEB_STYLES:
+            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
@ -409,12 +415,30 @@ class HTMLInput(InputFormatPlugin):
        oeb.container = DirContainer(os.getcwdu(), oeb.log)
        return oeb
    def link_to_local_path(self, link_, base=None):
        if not isinstance(link_, unicode):
            try:
                link_ = link_.decode('utf-8', 'error')
            except:
                self.log.warn('Failed to decode link %r. Ignoring'%link_)
                return None, None
        try:
            l = Link(link_, base if base else os.getcwdu())
        except:
            self.log.exception('Failed to process link: %r'%link_)
            return None, None
        if l.path is None:
            # Not a local resource
            return None, None
        link = l.path.replace('/', os.sep).strip()
        frag = l.fragment
        if not link:
            return None, None
        return link, frag
    def resource_adder(self, link_, base=None):
-        link = self.urlnormalize(link_)
+        link, frag = self.link_to_local_path(link_, base=base)
-        link, frag = self.urldefrag(link)
+        if link is None:
        link = unquote(link).replace('/', os.sep)
        if not link.strip():
            return link_
        try:
            if base and not os.path.isabs(link):
@ -442,6 +466,9 @@ class HTMLInput(InputFormatPlugin):
            item = self.oeb.manifest.add(id, href, media_type)
            item.html_input_href = bhref
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = partial(
                        self.css_import_handler, os.path.dirname(link))
            item.data
            self.added_resources[link] = href
@ -450,7 +477,17 @@ class HTMLInput(InputFormatPlugin):
            nlink = '#'.join((nlink, frag))
        return nlink
-
+    def css_import_handler(self, base, href):
        link, frag = self.link_to_local_path(href, base=base)
        if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
            return (None, None)
        try:
            raw = open(link, 'rb').read().decode('utf-8', 'replace')
            raw = self.oeb.css_preprocessor(raw, add_namespace=True)
        except:
            self.log.exception('Failed to read CSS file: %r'%link)
            return (None, None)
        return (None, raw)
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -17,6 +17,7 @@ from urlparse import urljoin
 from lxml import etree, html
 from cssutils import CSSParser
 from cssutils.css import CSSRule
 import calibre
 from calibre.constants import filesystem_encoding
@ -762,6 +763,7 @@ class Manifest(object):
            self.href = self.path = urlnormalize(href)
            self.media_type = media_type
            self.fallback = fallback
            self.override_css_fetch = None
            self.spine_position = None
            self.linear = True
            if loader is None and data is None:
@ -982,15 +984,40 @@ class Manifest(object):
        def _parse_css(self, data):
            def get_style_rules_from_import(import_rule):
                ans = []
                if not import_rule.styleSheet:
                    return ans
                rules = import_rule.styleSheet.cssRules
                for rule in rules:
                    if rule.type == CSSRule.IMPORT_RULE:
                        ans.extend(get_style_rules_from_import(rule))
                    elif rule.type in (CSSRule.FONT_FACE_RULE,
                            CSSRule.STYLE_RULE):
                        ans.append(rule)
                return ans
            self.oeb.log.debug('Parsing', self.href, '...')
            data = self.oeb.decode(data)
-            data = self.oeb.css_preprocessor(data)
+            data = self.oeb.css_preprocessor(data, add_namespace=True)
            data = XHTML_CSS_NAMESPACE + data
            parser = CSSParser(loglevel=logging.WARNING,
-                               fetcher=self._fetch_css,
+                               fetcher=self.override_css_fetch or self._fetch_css,
                               log=_css_logger)
            data = parser.parseString(data, href=self.href)
            data.namespaces['h'] = XHTML_NS
            import_rules = list(data.cssRules.rulesOfType(CSSRule.IMPORT_RULE))
            rules_to_append = []
            insert_index = None
            for r in data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
                insert_index = data.cssRules.index(r)
                break
            for rule in import_rules:
                rules_to_append.extend(get_style_rules_from_import(rule))
            for r in reversed(rules_to_append):
                data.insertRule(r, index=insert_index)
            for rule in import_rules:
                data.deleteRule(rule)
            return data
        def _fetch_css(self, path):
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -139,11 +139,18 @@ class EbookIterator(object):
                    if id != -1:
                        families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)]
                        if family:
-                            family = family.group(1).strip().replace('"', '')
+                            family = family.group(1)
-                            bad_map[family] = families[0]
+                            specified_families = [x.strip().replace('"',
-                            if family not in families:
+                                '').replace("'", '') for x in family.split(',')]
                            aliasing_ok = False
                            for f in specified_families:
                                bad_map[f] = families[0]
                                if not aliasing_ok and f in families:
                                    aliasing_ok = True
                            if not aliasing_ok:
                                prints('WARNING: Family aliasing not fully supported.')
-                                prints('\tDeclared family: %s not in actual families: %s'
+                                prints('\tDeclared family: %r not in actual families: %r'
                                        % (family, families))
                            else:
                                prints('Loaded embedded font:', repr(family))
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -1240,6 +1240,8 @@ class DeviceMixin(object): # {{{
        self.card_b_view.reset()
    def _upload_collections(self, job, view):
        if job.failed:
            self.device_job_exception(job)
        view.reset()
    def upload_collections(self, booklist, view):
--- a/src/calibre/gui2/dialogs/tag_list_editor.py
+++ b/src/calibre/gui2/dialogs/tag_list_editor.py
@ -74,5 +74,3 @@ class TagListEditor(QDialog, Ui_TagListEditor):
            self.to_delete.append(id)
            self.available_tags.takeItem(self.available_tags.row(item))
    def accept(self):
        QDialog.accept(self)
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -302,7 +302,7 @@ Take your pick:
 Why does |app| show only some of my fonts on OS X?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts founf on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory.
+|app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory.
 |app| is not starting on Windows?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~