mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
8153b33522
@ -1,3 +1,4 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class JerusalemPost(BasicNewsRecipe):
|
||||
@ -10,8 +11,6 @@ class JerusalemPost(BasicNewsRecipe):
|
||||
__author__ = 'Kovid Goyal'
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
remove_tags_before = {'class':'jp-grid-content'}
|
||||
remove_tags_after = {'id':'body_val'}
|
||||
|
||||
feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
|
||||
('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
|
||||
@ -20,9 +19,24 @@ class JerusalemPost(BasicNewsRecipe):
|
||||
('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(id=lambda x: x and 'ads.' in x),
|
||||
dict(attrs={'class':['printinfo', 'tt1']}),
|
||||
dict(onclick='DoPrint()'),
|
||||
dict(name='input'),
|
||||
]
|
||||
|
||||
conversion_options = {'linearize_tables':True}
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for x in soup.findAll(name=['form', 'input']):
|
||||
x.name = 'div'
|
||||
for x in soup.findAll('body', style=True):
|
||||
del x['style']
|
||||
for tag in soup.findAll('form'):
|
||||
tag.name = 'div'
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
m = re.search(r'(ID|id)=(\d+)', url)
|
||||
if m is not None:
|
||||
id_ = m.group(2)
|
||||
return 'http://www.jpost.com/LandedPages/PrintArticle.aspx?id=%s'%id_
|
||||
return url
|
||||
|
||||
|
@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
|
||||
Input plugin for HTML or OPF ebooks.
|
||||
'''
|
||||
|
||||
import os, re, sys, uuid
|
||||
import os, re, sys, uuid, tempfile
|
||||
from urlparse import urlparse, urlunparse
|
||||
from urllib import unquote
|
||||
from functools import partial
|
||||
@ -272,6 +272,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, opts, file_ext, log,
|
||||
accelerators):
|
||||
self._is_case_sensitive = None
|
||||
basedir = os.getcwd()
|
||||
self.opts = opts
|
||||
|
||||
@ -290,6 +291,15 @@ class HTMLInput(InputFormatPlugin):
|
||||
return create_oebbook(log, stream.name, opts, self,
|
||||
encoding=opts.input_encoding)
|
||||
|
||||
def is_case_sensitive(self, path):
|
||||
if self._is_case_sensitive is not None:
|
||||
return self._is_case_sensitive
|
||||
if not path or not os.path.exists(path):
|
||||
return islinux or isfreebsd
|
||||
self._is_case_sensitive = os.path.exists(path.lower()) \
|
||||
and os.path.exists(path.upper())
|
||||
return self._is_case_sensitive
|
||||
|
||||
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
from calibre.ebooks.oeb.base import DirContainer, \
|
||||
@ -343,14 +353,16 @@ class HTMLInput(InputFormatPlugin):
|
||||
|
||||
self.added_resources = {}
|
||||
self.log = log
|
||||
self.log('Normalizing filename cases')
|
||||
for path, href in htmlfile_map.items():
|
||||
if not (islinux or isfreebsd):
|
||||
if not self.is_case_sensitive(path):
|
||||
path = path.lower()
|
||||
self.added_resources[path] = href
|
||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||
self.urldefrag = urldefrag
|
||||
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
||||
|
||||
self.log('Rewriting HTML links')
|
||||
for f in filelist:
|
||||
path = f.path
|
||||
dpath = os.path.dirname(path)
|
||||
@ -415,7 +427,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
if os.path.isdir(link):
|
||||
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
||||
return link_
|
||||
if not (islinux or isfreebsd):
|
||||
if not self.is_case_sensitive(tempfile.gettempdir()):
|
||||
link = link.lower()
|
||||
if link not in self.added_resources:
|
||||
bhref = os.path.basename(link)
|
||||
|
@ -220,6 +220,10 @@ class Scheduler(QObject):
|
||||
self.cac = QAction(QIcon(I('user_profile.svg')), _('Add a custom news source'), self)
|
||||
self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds)
|
||||
self.news_menu.addAction(self.cac)
|
||||
self.news_menu.addSeparator()
|
||||
self.all_action = self.news_menu.addAction(
|
||||
_('Download all scheduled new sources'),
|
||||
self.download_all_scheduled)
|
||||
|
||||
self.timer = QTimer(self)
|
||||
self.timer.start(int(self.INTERVAL * 60000))
|
||||
@ -304,7 +308,11 @@ class Scheduler(QObject):
|
||||
if urn is not None:
|
||||
return self.download(urn)
|
||||
for urn in self.recipe_model.scheduled_urns():
|
||||
self.download(urn)
|
||||
if not self.download(urn):
|
||||
break
|
||||
|
||||
def download_all_scheduled(self):
|
||||
self.download_clicked(None)
|
||||
|
||||
def download(self, urn):
|
||||
self.lock.lock()
|
||||
@ -316,12 +324,13 @@ class Scheduler(QObject):
|
||||
'is active'))
|
||||
d.setModal(False)
|
||||
d.show()
|
||||
return
|
||||
return False
|
||||
self.internet_connection_failed = False
|
||||
doit = urn not in self.download_queue
|
||||
self.lock.unlock()
|
||||
if doit:
|
||||
self.do_download(urn)
|
||||
return True
|
||||
|
||||
def check(self):
|
||||
recipes = self.recipe_model.get_to_be_downloaded_recipes()
|
||||
|
@ -113,7 +113,7 @@ Metadata download plugins
|
||||
When :meth:`fetch` is called, the `self` object will have the following
|
||||
useful attributes (each of which may be None)::
|
||||
|
||||
title, author, publisher, isbn, log, verbose and extra
|
||||
title, book_author, publisher, isbn, log, verbose and extra
|
||||
|
||||
Use these attributes to construct the search query. extra is reserved for
|
||||
future use.
|
||||
|
Loading…
x
Reference in New Issue
Block a user