Merge with trunk.

This commit is contained in:
Oliver Graf 2012-07-29 08:25:14 +02:00
commit c10c8d03ad
4 changed files with 91 additions and 66 deletions

View File

@ -0,0 +1,75 @@
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from collections import OrderedDict
class PhilosophyNow(BasicNewsRecipe):
title = 'Philosophy Now'
__author__ = 'Rick Shang'
description = '''Philosophy Now is a lively magazine for everyone
interested in ideas. It isn't afraid to tackle all the major questions of
life, the universe and everything. Published every two months, it tries to
corrupt innocent citizens by convincing them that philosophy can be
exciting, worthwhile and comprehensible, and also to provide some enjoyable
reading matter for those already ensnared by the muse, such as philosophy
students and academics.'''
language = 'en'
category = 'news'
encoding = 'UTF-8'
keep_only_tags = [dict(attrs={'id':'fullMainColumn'})]
remove_tags = [dict(attrs={'class':'articleTools'})]
no_javascript = True
no_stylesheets = True
needs_subscription = True
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('https://philosophynow.org/auth/login')
br.select_form(nr = 1)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
#Go to the issue
soup0 = self.index_to_soup('http://philosophynow.org/')
issue = soup0.find('div',attrs={'id':'navColumn'})
#Find date & cover
cover = issue.find('div', attrs={'id':'cover'})
date = self.tag_to_string(cover.find('h3')).strip()
self.timefmt = u' [%s]'%date
img=cover.find('img',src=True)['src']
self.cover_url = 'http://philosophynow.org' + re.sub('medium','large',img)
issuenum = re.sub('/media/images/covers/medium/issue','',img)
issuenum = re.sub('.jpg','',issuenum)
#Go to the main body
current_issue_url = 'http://philosophynow.org/issues/' + issuenum
soup = self.index_to_soup(current_issue_url)
div = soup.find ('div', attrs={'class':'articlesColumn'})
feeds = OrderedDict()
for post in div.findAll('h3'):
articles = []
a=post.find('a',href=True)
if a is not None:
url="http://philosophynow.org" + a['href']
title=self.tag_to_string(a).strip()
s=post.findPrevious('h4')
section_title = self.tag_to_string(s).strip()
d=post.findNext('p')
desc = self.tag_to_string(d).strip()
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
if articles:
if section_title not in feeds:
feeds[section_title] = []
feeds[section_title] += articles
ans = [(key, val) for key, val in feeds.iteritems()]
return ans

View File

@ -506,16 +506,6 @@ compile_gpm_templates = True
# default_tweak_format = 'remember'
default_tweak_format = None
#: Enable multi-character first-letters in the tag browser
# Some languages have letters that can be represented by multiple characters.
# For example, Czech has a 'character' "ch" that sorts between "h" and "i".
# If this tweak is True, then the tag browser will take these characters into
# consideration when partitioning by first letter.
# Examples:
# enable_multicharacters_in_tag_browser = True
# enable_multicharacters_in_tag_browser = False
enable_multicharacters_in_tag_browser = True
#: Do not preselect a completion when editing authors/tags/series/etc.
# This means that you can make changes and press Enter and your changes will
# not be overwritten by a matching completion. However, if you wish to use the

View File

@ -11,8 +11,9 @@ import re
from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
namespace, prefixname, urlnormalize)
from calibre.ebooks import normalize
from calibre.ebooks.mobi.mobiml import MBP_NS
from calibre.ebooks.mobi.utils import is_guide_ref_start, utf8_text
from calibre.ebooks.mobi.utils import is_guide_ref_start
from collections import defaultdict
from urlparse import urldefrag
@ -355,7 +356,7 @@ class Serializer(object):
text = text.replace(u'\u00AD', '') # Soft-hyphen
if quot:
text = text.replace('"', '"')
self.buf.write(utf8_text(text, empty=True))
self.buf.write(normalize(text).encode('utf-8'))
def fixup_links(self):
'''

View File

@ -9,7 +9,6 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import traceback, cPickle, copy
from itertools import repeat
from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
QMimeData, QModelIndex, pyqtSignal, QObject)
@ -17,7 +16,7 @@ from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
from calibre.gui2 import NONE, gprefs, config, error_dialog
from calibre.library.database2 import Tag
from calibre.utils.config import tweaks
from calibre.utils.icu import sort_key, lower, strcmp, contractions
from calibre.utils.icu import sort_key, lower, strcmp, collation_order
from calibre.library.field_metadata import TagsIcons, category_icon_map
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.utils.formatter import EvalFormatter
@ -258,16 +257,6 @@ class TagsModel(QAbstractItemModel): # {{{
self.hidden_categories.add(cat)
db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
conts = contractions()
if len(conts) == 0 or not tweaks['enable_multicharacters_in_tag_browser']:
self.do_contraction = False
else:
self.do_contraction = True
nconts = set()
for s in conts:
nconts.add(icu_upper(s))
self.contraction_set = frozenset(nconts)
self.db = db
self._run_rebuild()
self.endResetModel()
@ -416,53 +405,23 @@ class TagsModel(QAbstractItemModel): # {{{
tt = key if in_uc else None
if collapse_model == 'first letter':
# Build a list of 'equal' first letters by looking for
# overlapping ranges. If a range overlaps another, then the
# letters are assumed to be equivalent. ICU collating is complex
# beyond belief. This mechanism lets us determine the logical
# first character from ICU's standpoint.
chardict = {}
# Build a list of 'equal' first letters by noticing changes
# in ICU's 'ordinal' for the first letter. In this case, the
# first letter can actually be more than one letter long.
cl_list = [None] * len(data[key])
last_ordnum = 0
for idx,tag in enumerate(data[key]):
if not tag.sort:
c = ' '
else:
if not self.do_contraction:
c = icu_upper(tag.sort)[0]
else:
v = icu_upper(tag.sort)
c = v[0]
for s in self.contraction_set:
if len(s) > len(c) and v.startswith(s):
c = s
if c not in chardict:
chardict[c] = [idx, idx]
else:
chardict[c][1] = idx
# sort the ranges to facilitate detecting overlap
if len(chardict) == 1 and ' ' in chardict:
# The category could not be partitioned.
collapse_model = 'disable'
else:
ranges = sorted([(v[0], v[1], c) for c,v in chardict.items()])
# Create a list of 'first letters' to use for each item in
# the category. The list is generated using the ranges. Overlaps
# are filled with the character that first occurs.
cl_list = list(repeat(None, len(data[key])))
for t in ranges:
start = t[0]
c = t[2]
if cl_list[start] is None:
nc = c
else:
nc = cl_list[start]
for i in range(start, t[1]+1):
cl_list[i] = nc
if len(data[key]) > 0:
c = tag.sort
ordnum, ordlen = collation_order(c)
if last_ordnum != ordnum:
last_c = icu_upper(c[0:ordlen])
last_ordnum = ordnum
cl_list[idx] = last_c
top_level_component = 'z' + data[key][0].original_name
else:
top_level_component = ''
last_idx = -collapse
category_is_hierarchical = not (
key in ['authors', 'publisher', 'news', 'formats', 'rating'] or