Merge with trunk.

2025-06-23 15:30:45 -04:00 · 2012-07-29 08:25:14 +02:00 · 2012-07-29 08:25:14 +02:00 · c10c8d03ad
commit c10c8d03ad
parent 6ce45b8dc0 cca78313c1
4 changed files with 91 additions and 66 deletions
--- a/recipes/phillosophy_now.recipe
+++ b/recipes/phillosophy_now.recipe
@ -0,0 +1,75 @@
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from collections import OrderedDict
+
+class PhilosophyNow(BasicNewsRecipe):
+
+    title       = 'Philosophy Now'
+    __author__  = 'Rick Shang'
+
+    description = '''Philosophy Now is a lively magazine for everyone
+    interested in ideas. It isn't afraid to tackle all the major questions of
+    life, the universe and everything. Published every two months, it tries to
+    corrupt innocent citizens by convincing them that philosophy can be
+    exciting, worthwhile and comprehensible, and also to provide some enjoyable
+    reading matter for those already ensnared by the muse, such as philosophy
+    students and academics.'''
+    language = 'en'
+    category = 'news'
+    encoding = 'UTF-8'
+
+    keep_only_tags = [dict(attrs={'id':'fullMainColumn'})]
+    remove_tags = [dict(attrs={'class':'articleTools'})]
+    no_javascript = True
+    no_stylesheets = True
+    needs_subscription = True
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.open('https://philosophynow.org/auth/login')
+        br.select_form(nr = 1)
+        br['username'] = self.username
+        br['password'] = self.password
+        br.submit()
+        return br
+
+    def parse_index(self):
+        #Go to the issue
+        soup0 = self.index_to_soup('http://philosophynow.org/')
+        issue = soup0.find('div',attrs={'id':'navColumn'})
+
+        #Find date & cover
+        cover = issue.find('div', attrs={'id':'cover'})
+        date = self.tag_to_string(cover.find('h3')).strip()
+        self.timefmt = u' [%s]'%date
+        img=cover.find('img',src=True)['src']
+        self.cover_url = 'http://philosophynow.org' + re.sub('medium','large',img)
+        issuenum = re.sub('/media/images/covers/medium/issue','',img)
+        issuenum = re.sub('.jpg','',issuenum)
+
+        #Go to the main body
+        current_issue_url = 'http://philosophynow.org/issues/' + issuenum
+        soup = self.index_to_soup(current_issue_url)
+        div = soup.find ('div', attrs={'class':'articlesColumn'})
+
+        feeds = OrderedDict()
+
+        for post in div.findAll('h3'):
+            articles = []
+            a=post.find('a',href=True)
+            if a is not None:
+                url="http://philosophynow.org" + a['href']
+                title=self.tag_to_string(a).strip()
+                s=post.findPrevious('h4')
+                section_title = self.tag_to_string(s).strip()
+                d=post.findNext('p')
+                desc = self.tag_to_string(d).strip()
+                articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
+
+                if articles:
+                    if section_title not in feeds:
+                        feeds[section_title] = []
+                    feeds[section_title] += articles
+        ans = [(key, val) for key, val in feeds.iteritems()]
+        return ans
+
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -506,16 +506,6 @@ compile_gpm_templates = True
 #   default_tweak_format = 'remember'
 default_tweak_format = None

-#: Enable multi-character first-letters in the tag browser
-# Some languages have letters that can be represented by multiple characters.
-# For example, Czech has a 'character' "ch" that sorts between "h" and "i".
-# If this tweak is True, then the tag browser will take these characters into
-# consideration when partitioning by first letter.
-# Examples:
-#    enable_multicharacters_in_tag_browser = True
-#    enable_multicharacters_in_tag_browser = False
-enable_multicharacters_in_tag_browser = True
-
 #: Do not preselect a completion when editing authors/tags/series/etc.
 # This means that you can make changes and press Enter and your changes will
 # not be overwritten by a matching completion. However, if you wish to use the
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -11,8 +11,9 @@ import re

 from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
        namespace, prefixname, urlnormalize)
+from calibre.ebooks import normalize
 from calibre.ebooks.mobi.mobiml import MBP_NS
-from calibre.ebooks.mobi.utils import is_guide_ref_start, utf8_text
+from calibre.ebooks.mobi.utils import is_guide_ref_start

 from collections import defaultdict
 from urlparse import urldefrag
@ -355,7 +356,7 @@ class Serializer(object):
        text = text.replace(u'\u00AD', '') # Soft-hyphen
        if quot:
            text = text.replace('"', '&quot;')
-        self.buf.write(utf8_text(text, empty=True))
+        self.buf.write(normalize(text).encode('utf-8'))

    def fixup_links(self):
        '''
--- a/src/calibre/gui2/tag_browser/model.py
+++ b/src/calibre/gui2/tag_browser/model.py
@ -9,7 +9,6 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import traceback, cPickle, copy
-from itertools import repeat

 from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
        QMimeData, QModelIndex, pyqtSignal, QObject)
@ -17,7 +16,7 @@ from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
 from calibre.gui2 import NONE, gprefs, config, error_dialog
 from calibre.library.database2 import Tag
 from calibre.utils.config import tweaks
-from calibre.utils.icu import sort_key, lower, strcmp, contractions
+from calibre.utils.icu import sort_key, lower, strcmp, collation_order
 from calibre.library.field_metadata import TagsIcons, category_icon_map
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.utils.formatter import EvalFormatter
@ -258,16 +257,6 @@ class TagsModel(QAbstractItemModel): # {{{
                self.hidden_categories.add(cat)
        db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))

-        conts = contractions()
-        if len(conts) == 0 or not tweaks['enable_multicharacters_in_tag_browser']:
-            self.do_contraction = False
-        else:
-            self.do_contraction = True
-            nconts = set()
-            for s in conts:
-                nconts.add(icu_upper(s))
-            self.contraction_set = frozenset(nconts)
-
        self.db = db
        self._run_rebuild()
        self.endResetModel()
@ -416,53 +405,23 @@ class TagsModel(QAbstractItemModel): # {{{
            tt = key if in_uc else None

            if collapse_model == 'first letter':
-                # Build a list of 'equal' first letters by looking for
-                # overlapping ranges. If a range overlaps another, then the
-                # letters are assumed to be equivalent. ICU collating is complex
-                # beyond belief. This mechanism lets us determine the logical
-                # first character from ICU's standpoint.
-                chardict = {}
+                # Build a list of 'equal' first letters by noticing changes
+                # in ICU's 'ordinal' for the first letter. In this case, the
+                # first letter can actually be more than one letter long.
+                cl_list = [None] * len(data[key])
+                last_ordnum = 0
                for idx,tag in enumerate(data[key]):
                    if not tag.sort:
                        c = ' '
                    else:
-                        if not self.do_contraction:
-                            c = icu_upper(tag.sort)[0]
-                        else:
-                            v = icu_upper(tag.sort)
-                            c = v[0]
-                            for s in self.contraction_set:
-                                if len(s) > len(c) and v.startswith(s):
-                                    c = s
-                    if c not in chardict:
-                        chardict[c] = [idx, idx]
-                    else:
-                        chardict[c][1] = idx
-
-                # sort the ranges to facilitate detecting overlap
-                if len(chardict) == 1 and ' ' in chardict:
-                    # The category could not be partitioned.
-                    collapse_model = 'disable'
-                else:
-                    ranges = sorted([(v[0], v[1], c) for c,v in chardict.items()])
-                    # Create a list of 'first letters' to use for each item in
-                    # the category. The list is generated using the ranges. Overlaps
-                    # are filled with the character that first occurs.
-                    cl_list = list(repeat(None, len(data[key])))
-                    for t in ranges:
-                        start = t[0]
-                        c = t[2]
-                        if cl_list[start] is None:
-                            nc = c
-                        else:
-                            nc = cl_list[start]
-                        for i in range(start, t[1]+1):
-                            cl_list[i] = nc
-
-            if len(data[key]) > 0:
+                        c = tag.sort
+                    ordnum, ordlen = collation_order(c)
+                    if last_ordnum != ordnum:
+                        last_c = icu_upper(c[0:ordlen])
+                        last_ordnum = ordnum
+                    cl_list[idx] = last_c
            top_level_component = 'z' + data[key][0].original_name
-            else:
-                top_level_component = ''
+
            last_idx = -collapse
            category_is_hierarchical = not (
                key in ['authors', 'publisher', 'news', 'formats', 'rating'] or