Merge with trunk.

2025-11-15 19:13:02 -05:00 · 2012-07-29 08:25:14 +02:00 · 2012-07-29 08:25:14 +02:00 · c10c8d03ad
commit c10c8d03ad
parent 6ce45b8dc0 cca78313c1
4 changed files with 91 additions and 66 deletions
--- a/recipes/phillosophy_now.recipe
+++ b/recipes/phillosophy_now.recipe
@ -0,0 +1,75 @@
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from collections import OrderedDict
 class PhilosophyNow(BasicNewsRecipe):
    title       = 'Philosophy Now'
    __author__  = 'Rick Shang'
    description = '''Philosophy Now is a lively magazine for everyone
    interested in ideas. It isn't afraid to tackle all the major questions of
    life, the universe and everything. Published every two months, it tries to
    corrupt innocent citizens by convincing them that philosophy can be
    exciting, worthwhile and comprehensible, and also to provide some enjoyable
    reading matter for those already ensnared by the muse, such as philosophy
    students and academics.'''
    language = 'en'
    category = 'news'
    encoding = 'UTF-8'
    keep_only_tags = [dict(attrs={'id':'fullMainColumn'})]
    remove_tags = [dict(attrs={'class':'articleTools'})]
    no_javascript = True
    no_stylesheets = True
    needs_subscription = True
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open('https://philosophynow.org/auth/login')
        br.select_form(nr = 1)
        br['username'] = self.username
        br['password'] = self.password
        br.submit()
        return br
    def parse_index(self):
        #Go to the issue
        soup0 = self.index_to_soup('http://philosophynow.org/')
        issue = soup0.find('div',attrs={'id':'navColumn'})
        #Find date & cover
        cover = issue.find('div', attrs={'id':'cover'})
        date = self.tag_to_string(cover.find('h3')).strip()
        self.timefmt = u' [%s]'%date
        img=cover.find('img',src=True)['src']
        self.cover_url = 'http://philosophynow.org' + re.sub('medium','large',img)
        issuenum = re.sub('/media/images/covers/medium/issue','',img)
        issuenum = re.sub('.jpg','',issuenum)
        #Go to the main body
        current_issue_url = 'http://philosophynow.org/issues/' + issuenum
        soup = self.index_to_soup(current_issue_url)
        div = soup.find ('div', attrs={'class':'articlesColumn'})
        feeds = OrderedDict()
        for post in div.findAll('h3'):
            articles = []
            a=post.find('a',href=True)
            if a is not None:
                url="http://philosophynow.org" + a['href']
                title=self.tag_to_string(a).strip()
                s=post.findPrevious('h4')
                section_title = self.tag_to_string(s).strip()
                d=post.findNext('p')
                desc = self.tag_to_string(d).strip()
                articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
                if articles:
                    if section_title not in feeds:
                        feeds[section_title] = []
                    feeds[section_title] += articles
        ans = [(key, val) for key, val in feeds.iteritems()]
        return ans
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -506,16 +506,6 @@ compile_gpm_templates = True
 #   default_tweak_format = 'remember'
 default_tweak_format = None
 #: Enable multi-character first-letters in the tag browser
 # Some languages have letters that can be represented by multiple characters.
 # For example, Czech has a 'character' "ch" that sorts between "h" and "i".
 # If this tweak is True, then the tag browser will take these characters into
 # consideration when partitioning by first letter.
 # Examples:
 #    enable_multicharacters_in_tag_browser = True
 #    enable_multicharacters_in_tag_browser = False
 enable_multicharacters_in_tag_browser = True
 #: Do not preselect a completion when editing authors/tags/series/etc.
 # This means that you can make changes and press Enter and your changes will
 # not be overwritten by a matching completion. However, if you wish to use the
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -11,8 +11,9 @@ import re
 from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
        namespace, prefixname, urlnormalize)
 from calibre.ebooks import normalize
 from calibre.ebooks.mobi.mobiml import MBP_NS
-from calibre.ebooks.mobi.utils import is_guide_ref_start, utf8_text
+from calibre.ebooks.mobi.utils import is_guide_ref_start
 from collections import defaultdict
 from urlparse import urldefrag
@ -355,7 +356,7 @@ class Serializer(object):
        text = text.replace(u'\u00AD', '') # Soft-hyphen
        if quot:
            text = text.replace('"', '&quot;')
-        self.buf.write(utf8_text(text, empty=True))
+        self.buf.write(normalize(text).encode('utf-8'))
    def fixup_links(self):
        '''
--- a/src/calibre/gui2/tag_browser/model.py
+++ b/src/calibre/gui2/tag_browser/model.py
@ -9,7 +9,6 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import traceback, cPickle, copy
 from itertools import repeat
 from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
        QMimeData, QModelIndex, pyqtSignal, QObject)
@ -17,7 +16,7 @@ from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
 from calibre.gui2 import NONE, gprefs, config, error_dialog
 from calibre.library.database2 import Tag
 from calibre.utils.config import tweaks
-from calibre.utils.icu import sort_key, lower, strcmp, contractions
+from calibre.utils.icu import sort_key, lower, strcmp, collation_order
 from calibre.library.field_metadata import TagsIcons, category_icon_map
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.utils.formatter import EvalFormatter
@ -258,16 +257,6 @@ class TagsModel(QAbstractItemModel): # {{{
                self.hidden_categories.add(cat)
        db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
        conts = contractions()
        if len(conts) == 0 or not tweaks['enable_multicharacters_in_tag_browser']:
            self.do_contraction = False
        else:
            self.do_contraction = True
            nconts = set()
            for s in conts:
                nconts.add(icu_upper(s))
            self.contraction_set = frozenset(nconts)
        self.db = db
        self._run_rebuild()
        self.endResetModel()
@ -416,53 +405,23 @@ class TagsModel(QAbstractItemModel): # {{{
            tt = key if in_uc else None
            if collapse_model == 'first letter':
-                # Build a list of 'equal' first letters by looking for
+                # Build a list of 'equal' first letters by noticing changes
-                # overlapping ranges. If a range overlaps another, then the
+                # in ICU's 'ordinal' for the first letter. In this case, the
-                # letters are assumed to be equivalent. ICU collating is complex
+                # first letter can actually be more than one letter long.
-                # beyond belief. This mechanism lets us determine the logical
+                cl_list = [None] * len(data[key])
-                # first character from ICU's standpoint.
+                last_ordnum = 0
                chardict = {}
                for idx,tag in enumerate(data[key]):
                    if not tag.sort:
                        c = ' '
                    else:
-                        if not self.do_contraction:
+                        c = tag.sort
-                            c = icu_upper(tag.sort)[0]
+                    ordnum, ordlen = collation_order(c)
-                        else:
+                    if last_ordnum != ordnum:
-                            v = icu_upper(tag.sort)
+                        last_c = icu_upper(c[0:ordlen])
-                            c = v[0]
+                        last_ordnum = ordnum
-                            for s in self.contraction_set:
+                    cl_list[idx] = last_c
-                                if len(s) > len(c) and v.startswith(s):
+            top_level_component = 'z' + data[key][0].original_name
                                    c = s
                    if c not in chardict:
                        chardict[c] = [idx, idx]
                    else:
                        chardict[c][1] = idx
                # sort the ranges to facilitate detecting overlap
                if len(chardict) == 1 and ' ' in chardict:
                    # The category could not be partitioned.
                    collapse_model = 'disable'
                else:
                    ranges = sorted([(v[0], v[1], c) for c,v in chardict.items()])
                    # Create a list of 'first letters' to use for each item in
                    # the category. The list is generated using the ranges. Overlaps
                    # are filled with the character that first occurs.
                    cl_list = list(repeat(None, len(data[key])))
                    for t in ranges:
                        start = t[0]
                        c = t[2]
                        if cl_list[start] is None:
                            nc = c
                        else:
                            nc = cl_list[start]
                        for i in range(start, t[1]+1):
                            cl_list[i] = nc
            if len(data[key]) > 0:
                top_level_component = 'z' + data[key][0].original_name
            else:
                top_level_component = ''
            last_idx = -collapse
            category_is_hierarchical = not (
                key in ['authors', 'publisher', 'news', 'formats', 'rating'] or