Merge from trunk

2025-07-09 03:04:10 -04:00 · 2012-07-29 10:05:33 +02:00 · 2012-07-29 10:05:33 +02:00 · 6c61b49125
commit 6c61b49125
parent 64a3f0e0c7 c1f497d1c4
2 changed files with 78 additions and 2 deletions
--- a/recipes/phillosophy_now.recipe
+++ b/recipes/phillosophy_now.recipe
@ -0,0 +1,75 @@
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from collections import OrderedDict
 class PhilosophyNow(BasicNewsRecipe):
    title       = 'Philosophy Now'
    __author__  = 'Rick Shang'
    description = '''Philosophy Now is a lively magazine for everyone
    interested in ideas. It isn't afraid to tackle all the major questions of
    life, the universe and everything. Published every two months, it tries to
    corrupt innocent citizens by convincing them that philosophy can be
    exciting, worthwhile and comprehensible, and also to provide some enjoyable
    reading matter for those already ensnared by the muse, such as philosophy
    students and academics.'''
    language = 'en'
    category = 'news'
    encoding = 'UTF-8'
    keep_only_tags = [dict(attrs={'id':'fullMainColumn'})]
    remove_tags = [dict(attrs={'class':'articleTools'})]
    no_javascript = True
    no_stylesheets = True
    needs_subscription = True
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open('https://philosophynow.org/auth/login')
        br.select_form(nr = 1)
        br['username'] = self.username
        br['password'] = self.password
        br.submit()
        return br
    def parse_index(self):
        #Go to the issue
        soup0 = self.index_to_soup('http://philosophynow.org/')
        issue = soup0.find('div',attrs={'id':'navColumn'})
        #Find date & cover
        cover = issue.find('div', attrs={'id':'cover'})
        date = self.tag_to_string(cover.find('h3')).strip()
        self.timefmt = u' [%s]'%date
        img=cover.find('img',src=True)['src']
        self.cover_url = 'http://philosophynow.org' + re.sub('medium','large',img)
        issuenum = re.sub('/media/images/covers/medium/issue','',img)
        issuenum = re.sub('.jpg','',issuenum)
        #Go to the main body
        current_issue_url = 'http://philosophynow.org/issues/' + issuenum
        soup = self.index_to_soup(current_issue_url)
        div = soup.find ('div', attrs={'class':'articlesColumn'})
        feeds = OrderedDict()
        for post in div.findAll('h3'):
            articles = []
            a=post.find('a',href=True)
            if a is not None:
                url="http://philosophynow.org" + a['href']
                title=self.tag_to_string(a).strip()
                s=post.findPrevious('h4')
                section_title = self.tag_to_string(s).strip()
                d=post.findNext('p')
                desc = self.tag_to_string(d).strip()
                articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
                if articles:
                    if section_title not in feeds:
                        feeds[section_title] = []
                    feeds[section_title] += articles
        ans = [(key, val) for key, val in feeds.iteritems()]
        return ans
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -11,8 +11,9 @@ import re
 from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
        namespace, prefixname, urlnormalize)
 from calibre.ebooks import normalize
 from calibre.ebooks.mobi.mobiml import MBP_NS
-from calibre.ebooks.mobi.utils import is_guide_ref_start, utf8_text
+from calibre.ebooks.mobi.utils import is_guide_ref_start
 from collections import defaultdict
 from urlparse import urldefrag
@ -355,7 +356,7 @@ class Serializer(object):
        text = text.replace(u'\u00AD', '') # Soft-hyphen
        if quot:
            text = text.replace('"', '&quot;')
-        self.buf.write(utf8_text(text))
+        self.buf.write(normalize(text).encode('utf-8'))
    def fixup_links(self):
        '''