Pull from trunk

This commit is contained in:
Kovid Goyal 2010-05-02 10:28:50 -06:00
commit 8153b33522
4 changed files with 47 additions and 12 deletions

View File

@ -1,3 +1,4 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class JerusalemPost(BasicNewsRecipe):
@ -10,8 +11,6 @@ class JerusalemPost(BasicNewsRecipe):
__author__ = 'Kovid Goyal'
max_articles_per_feed = 10
no_stylesheets = True
remove_tags_before = {'class':'jp-grid-content'}
remove_tags_after = {'id':'body_val'}
feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
@ -20,9 +19,24 @@ class JerusalemPost(BasicNewsRecipe):
('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
]
remove_tags = [
dict(id=lambda x: x and 'ads.' in x),
dict(attrs={'class':['printinfo', 'tt1']}),
dict(onclick='DoPrint()'),
dict(name='input'),
]
conversion_options = {'linearize_tables':True}
def preprocess_html(self, soup):
for x in soup.findAll(name=['form', 'input']):
x.name = 'div'
for x in soup.findAll('body', style=True):
del x['style']
for tag in soup.findAll('form'):
tag.name = 'div'
return soup
def print_version(self, url):
m = re.search(r'(ID|id)=(\d+)', url)
if m is not None:
id_ = m.group(2)
return 'http://www.jpost.com/LandedPages/PrintArticle.aspx?id=%s'%id_
return url

View File

@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
Input plugin for HTML or OPF ebooks.
'''
import os, re, sys, uuid
import os, re, sys, uuid, tempfile
from urlparse import urlparse, urlunparse
from urllib import unquote
from functools import partial
@ -272,6 +272,7 @@ class HTMLInput(InputFormatPlugin):
def convert(self, stream, opts, file_ext, log,
accelerators):
self._is_case_sensitive = None
basedir = os.getcwd()
self.opts = opts
@ -290,6 +291,15 @@ class HTMLInput(InputFormatPlugin):
return create_oebbook(log, stream.name, opts, self,
encoding=opts.input_encoding)
def is_case_sensitive(self, path):
if self._is_case_sensitive is not None:
return self._is_case_sensitive
if not path or not os.path.exists(path):
return islinux or isfreebsd
self._is_case_sensitive = os.path.exists(path.lower()) \
and os.path.exists(path.upper())
return self._is_case_sensitive
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import DirContainer, \
@ -343,14 +353,16 @@ class HTMLInput(InputFormatPlugin):
self.added_resources = {}
self.log = log
self.log('Normalizing filename cases')
for path, href in htmlfile_map.items():
if not (islinux or isfreebsd):
if not self.is_case_sensitive(path):
path = path.lower()
self.added_resources[path] = href
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
self.urldefrag = urldefrag
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
self.log('Rewriting HTML links')
for f in filelist:
path = f.path
dpath = os.path.dirname(path)
@ -415,7 +427,7 @@ class HTMLInput(InputFormatPlugin):
if os.path.isdir(link):
self.log.warn(link_, 'is a link to a directory. Ignoring.')
return link_
if not (islinux or isfreebsd):
if not self.is_case_sensitive(tempfile.gettempdir()):
link = link.lower()
if link not in self.added_resources:
bhref = os.path.basename(link)

View File

@ -220,6 +220,10 @@ class Scheduler(QObject):
self.cac = QAction(QIcon(I('user_profile.svg')), _('Add a custom news source'), self)
self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds)
self.news_menu.addAction(self.cac)
self.news_menu.addSeparator()
self.all_action = self.news_menu.addAction(
_('Download all scheduled new sources'),
self.download_all_scheduled)
self.timer = QTimer(self)
self.timer.start(int(self.INTERVAL * 60000))
@ -304,7 +308,11 @@ class Scheduler(QObject):
if urn is not None:
return self.download(urn)
for urn in self.recipe_model.scheduled_urns():
self.download(urn)
if not self.download(urn):
break
def download_all_scheduled(self):
self.download_clicked(None)
def download(self, urn):
self.lock.lock()
@ -316,12 +324,13 @@ class Scheduler(QObject):
'is active'))
d.setModal(False)
d.show()
return
return False
self.internet_connection_failed = False
doit = urn not in self.download_queue
self.lock.unlock()
if doit:
self.do_download(urn)
return True
def check(self):
recipes = self.recipe_model.get_to_be_downloaded_recipes()

View File

@ -113,7 +113,7 @@ Metadata download plugins
When :meth:`fetch` is called, the `self` object will have the following
useful attributes (each of which may be None)::
title, author, publisher, isbn, log, verbose and extra
title, book_author, publisher, isbn, log, verbose and extra
Use these attributes to construct the search query. extra is reserved for
future use.