Pull from trunk

This commit is contained in:
Kovid Goyal 2010-05-02 10:28:50 -06:00
commit 8153b33522
4 changed files with 47 additions and 12 deletions

View File

@ -1,3 +1,4 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class JerusalemPost(BasicNewsRecipe): class JerusalemPost(BasicNewsRecipe):
@ -10,8 +11,6 @@ class JerusalemPost(BasicNewsRecipe):
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
max_articles_per_feed = 10 max_articles_per_feed = 10
no_stylesheets = True no_stylesheets = True
remove_tags_before = {'class':'jp-grid-content'}
remove_tags_after = {'id':'body_val'}
feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'), feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'), ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
@ -20,9 +19,24 @@ class JerusalemPost(BasicNewsRecipe):
('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'), ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
] ]
remove_tags = [
dict(id=lambda x: x and 'ads.' in x),
dict(attrs={'class':['printinfo', 'tt1']}),
dict(onclick='DoPrint()'),
dict(name='input'),
]
conversion_options = {'linearize_tables':True}
def preprocess_html(self, soup): def preprocess_html(self, soup):
for x in soup.findAll(name=['form', 'input']): for tag in soup.findAll('form'):
x.name = 'div' tag.name = 'div'
for x in soup.findAll('body', style=True):
del x['style']
return soup return soup
def print_version(self, url):
m = re.search(r'(ID|id)=(\d+)', url)
if m is not None:
id_ = m.group(2)
return 'http://www.jpost.com/LandedPages/PrintArticle.aspx?id=%s'%id_
return url

View File

@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
Input plugin for HTML or OPF ebooks. Input plugin for HTML or OPF ebooks.
''' '''
import os, re, sys, uuid import os, re, sys, uuid, tempfile
from urlparse import urlparse, urlunparse from urlparse import urlparse, urlunparse
from urllib import unquote from urllib import unquote
from functools import partial from functools import partial
@ -272,6 +272,7 @@ class HTMLInput(InputFormatPlugin):
def convert(self, stream, opts, file_ext, log, def convert(self, stream, opts, file_ext, log,
accelerators): accelerators):
self._is_case_sensitive = None
basedir = os.getcwd() basedir = os.getcwd()
self.opts = opts self.opts = opts
@ -290,6 +291,15 @@ class HTMLInput(InputFormatPlugin):
return create_oebbook(log, stream.name, opts, self, return create_oebbook(log, stream.name, opts, self,
encoding=opts.input_encoding) encoding=opts.input_encoding)
def is_case_sensitive(self, path):
if self._is_case_sensitive is not None:
return self._is_case_sensitive
if not path or not os.path.exists(path):
return islinux or isfreebsd
self._is_case_sensitive = os.path.exists(path.lower()) \
and os.path.exists(path.upper())
return self._is_case_sensitive
def create_oebbook(self, htmlpath, basedir, opts, log, mi): def create_oebbook(self, htmlpath, basedir, opts, log, mi):
from calibre.ebooks.conversion.plumber import create_oebbook from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import DirContainer, \ from calibre.ebooks.oeb.base import DirContainer, \
@ -343,14 +353,16 @@ class HTMLInput(InputFormatPlugin):
self.added_resources = {} self.added_resources = {}
self.log = log self.log = log
self.log('Normalizing filename cases')
for path, href in htmlfile_map.items(): for path, href in htmlfile_map.items():
if not (islinux or isfreebsd): if not self.is_case_sensitive(path):
path = path.lower() path = path.lower()
self.added_resources[path] = href self.added_resources[path] = href
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
self.urldefrag = urldefrag self.urldefrag = urldefrag
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
self.log('Rewriting HTML links')
for f in filelist: for f in filelist:
path = f.path path = f.path
dpath = os.path.dirname(path) dpath = os.path.dirname(path)
@ -415,7 +427,7 @@ class HTMLInput(InputFormatPlugin):
if os.path.isdir(link): if os.path.isdir(link):
self.log.warn(link_, 'is a link to a directory. Ignoring.') self.log.warn(link_, 'is a link to a directory. Ignoring.')
return link_ return link_
if not (islinux or isfreebsd): if not self.is_case_sensitive(tempfile.gettempdir()):
link = link.lower() link = link.lower()
if link not in self.added_resources: if link not in self.added_resources:
bhref = os.path.basename(link) bhref = os.path.basename(link)

View File

@ -220,6 +220,10 @@ class Scheduler(QObject):
self.cac = QAction(QIcon(I('user_profile.svg')), _('Add a custom news source'), self) self.cac = QAction(QIcon(I('user_profile.svg')), _('Add a custom news source'), self)
self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds) self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds)
self.news_menu.addAction(self.cac) self.news_menu.addAction(self.cac)
self.news_menu.addSeparator()
self.all_action = self.news_menu.addAction(
_('Download all scheduled new sources'),
self.download_all_scheduled)
self.timer = QTimer(self) self.timer = QTimer(self)
self.timer.start(int(self.INTERVAL * 60000)) self.timer.start(int(self.INTERVAL * 60000))
@ -304,7 +308,11 @@ class Scheduler(QObject):
if urn is not None: if urn is not None:
return self.download(urn) return self.download(urn)
for urn in self.recipe_model.scheduled_urns(): for urn in self.recipe_model.scheduled_urns():
self.download(urn) if not self.download(urn):
break
def download_all_scheduled(self):
self.download_clicked(None)
def download(self, urn): def download(self, urn):
self.lock.lock() self.lock.lock()
@ -316,12 +324,13 @@ class Scheduler(QObject):
'is active')) 'is active'))
d.setModal(False) d.setModal(False)
d.show() d.show()
return return False
self.internet_connection_failed = False self.internet_connection_failed = False
doit = urn not in self.download_queue doit = urn not in self.download_queue
self.lock.unlock() self.lock.unlock()
if doit: if doit:
self.do_download(urn) self.do_download(urn)
return True
def check(self): def check(self):
recipes = self.recipe_model.get_to_be_downloaded_recipes() recipes = self.recipe_model.get_to_be_downloaded_recipes()

View File

@ -113,7 +113,7 @@ Metadata download plugins
When :meth:`fetch` is called, the `self` object will have the following When :meth:`fetch` is called, the `self` object will have the following
useful attributes (each of which may be None):: useful attributes (each of which may be None)::
title, author, publisher, isbn, log, verbose and extra title, book_author, publisher, isbn, log, verbose and extra
Use these attributes to construct the search query. extra is reserved for Use these attributes to construct the search query. extra is reserved for
future use. future use.