KG updates pre-6.36

This commit is contained in:
GRiker 2010-01-25 09:13:56 -07:00
commit 345e752714
4 changed files with 77 additions and 15 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 788 B

View File

@ -58,7 +58,7 @@ class JASN(BasicNewsRecipe):
# To parse artice toc # To parse artice toc
def parse_index(self): def parse_index(self):
parse_soup = self.jasn_get_index() parse_soup = self.jasn_get_index()
div = parse_soup.find(id='tocBody') div = parse_soup.find(id='tocBody')
current_section = None current_section = None
@ -91,7 +91,7 @@ class JASN(BasicNewsRecipe):
feeds.append((current_section, current_articles)) feeds.append((current_section, current_articles))
return feeds return feeds
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -112,4 +112,5 @@ class JASN(BasicNewsRecipe):
return soup return soup

View File

@ -0,0 +1,67 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.nursingtimes.net
'''
import urllib
from calibre.web.feeds.recipes import BasicNewsRecipe
class NursingTimes(BasicNewsRecipe):
title = 'Nursing Times'
__author__ = 'Darko Miletic'
description = 'Nursing practice, NHS and health care news'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
publisher = 'emap'
category = 'news, health, nursing, UK'
language = 'en-UK'
needs_subscription = True
LOGIN = 'http://www.nursingtimes.net/sign-in'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.LOGIN)
if self.username is not None and self.password is not None:
data = urllib.urlencode({ 'campaigncode' :'0'
,'referrer' :''
,'security_text':''
,'SIemail' :self.username
,'passWord' :self.password
,'LoginButton.x':'27'
,'LoginButton.y':'13'
})
br.open(self.LOGIN,data)
return br
keep_only_tags = [dict(name='div', attrs={'class':'storytext'})]
remove_tags = [
dict(name=['object','link','script','iframe'])
,dict(name='div',attrs={'id':'comments_form'})
]
remove_tags_after = dict(name='div',attrs={'id':'comments_form'})
feeds = [
(u'Breaking News', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=1')
,(u'Practice', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=512')
,(u'Behind the headlines', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=468')
,(u'Analysis', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=62')
,(u'Acute care news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=5')
,(u'Primary vare news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=231')
,(u'Mental Health news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=27')
,(u'Management news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=32')
,(u"Older people's nursing news", u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=181')
,(u'Respiratory news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=177')
,(u'Wound care news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=182')
]

View File

@ -3,15 +3,11 @@ import os, re, shutil, htmlentitydefs
from collections import namedtuple from collections import namedtuple
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from PyQt4.Qt import *
from calibre import filesystem_encoding from calibre import filesystem_encoding
from calibre.customize import CatalogPlugin from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre import filesystem_encoding, prints
from calibre.utils.logging import Log from calibre.utils.logging import Log
FIELDS = ['all', 'author_sort', 'authors', 'comments', FIELDS = ['all', 'author_sort', 'authors', 'comments',
@ -512,7 +508,7 @@ class EPUB_MOBI(CatalogPlugin):
self.__verbose = opts.verbose self.__verbose = opts.verbose
self.opts.log.info("CatalogBuilder(): Generating %s %s"% \ self.opts.log.info("CatalogBuilder(): Generating %s %s"% \
(self.opts.fmt, (self.opts.fmt,
"for %s" % self.opts.output_profile if self.opts.output_profile \ "for %s" % self.opts.output_profile if self.opts.output_profile \
else '')) else ''))
# Accessors # Accessors
@ -912,7 +908,7 @@ class EPUB_MOBI(CatalogPlugin):
if author[0] == current_author[0]: if author[0] == current_author[0]:
self.opts.log.warn("Warning: multiple entries for Author '%s' with differing Author Sort metadata:" % author[0]) self.opts.log.warn("Warning: multiple entries for Author '%s' with differing Author Sort metadata:" % author[0])
self.opts.log.warn(" '%s' != '%s'" % (author[1], current_author[1])) self.opts.log.warn(" '%s' != '%s'" % (author[1], current_author[1]))
# New author, save the previous author/sort/count # New author, save the previous author/sort/count
unique_authors.append((current_author[0], current_author[1].title(), unique_authors.append((current_author[0], current_author[1].title(),
books_by_current_author)) books_by_current_author))
@ -933,8 +929,8 @@ class EPUB_MOBI(CatalogPlugin):
if False and self.verbose: if False and self.verbose:
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors)) self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
for author in unique_authors: for author in unique_authors:
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20], self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
author[2])).encode('utf-8')) author[2])).encode('utf-8'))
self.authors = unique_authors self.authors = unique_authors
def generateHTMLDescriptions(self): def generateHTMLDescriptions(self):
@ -977,7 +973,7 @@ class EPUB_MOBI(CatalogPlugin):
authorTag.insert(1, aTag) authorTag.insert(1, aTag)
''' '''
# Insert the unlinked genres. # Insert the unlinked genres.
if 'tags' in title: if 'tags' in title:
tagsTag = body.find(attrs={'class':'tags'}) tagsTag = body.find(attrs={'class':'tags'})
emTag = Tag(soup,"em") emTag = Tag(soup,"em")
@ -1404,7 +1400,7 @@ class EPUB_MOBI(CatalogPlugin):
self.generateThumbnail(title, image_dir, thumb_file) self.generateThumbnail(title, image_dir, thumb_file)
else: else:
# Use default cover # Use default cover
if self.verbose: if self.verbose:
self.opts.log.warn(" using default cover for '%s'" % \ self.opts.log.warn(" using default cover for '%s'" % \
(title['title'])) (title['title']))
# Check to make sure default is current # Check to make sure default is current
@ -2282,8 +2278,6 @@ class EPUB_MOBI(CatalogPlugin):
# The 21-Day Consciousness Cleanse # The 21-Day Consciousness Cleanse
title_words = title.split(' ') title_words = title.split(' ')
if title_words[0].lower() in ['the','a','an']:
stop_word = title_words.pop(0)
# Scan for numbers in each word clump # Scan for numbers in each word clump
translated = [] translated = []