mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
8153b33522
@ -1,3 +1,4 @@
|
|||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class JerusalemPost(BasicNewsRecipe):
|
class JerusalemPost(BasicNewsRecipe):
|
||||||
@ -10,8 +11,6 @@ class JerusalemPost(BasicNewsRecipe):
|
|||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
max_articles_per_feed = 10
|
max_articles_per_feed = 10
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_tags_before = {'class':'jp-grid-content'}
|
|
||||||
remove_tags_after = {'id':'body_val'}
|
|
||||||
|
|
||||||
feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
|
feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
|
||||||
('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
|
('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
|
||||||
@ -20,9 +19,24 @@ class JerusalemPost(BasicNewsRecipe):
|
|||||||
('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
|
('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(id=lambda x: x and 'ads.' in x),
|
||||||
|
dict(attrs={'class':['printinfo', 'tt1']}),
|
||||||
|
dict(onclick='DoPrint()'),
|
||||||
|
dict(name='input'),
|
||||||
|
]
|
||||||
|
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for x in soup.findAll(name=['form', 'input']):
|
for tag in soup.findAll('form'):
|
||||||
x.name = 'div'
|
tag.name = 'div'
|
||||||
for x in soup.findAll('body', style=True):
|
|
||||||
del x['style']
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
m = re.search(r'(ID|id)=(\d+)', url)
|
||||||
|
if m is not None:
|
||||||
|
id_ = m.group(2)
|
||||||
|
return 'http://www.jpost.com/LandedPages/PrintArticle.aspx?id=%s'%id_
|
||||||
|
return url
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Input plugin for HTML or OPF ebooks.
|
Input plugin for HTML or OPF ebooks.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, re, sys, uuid
|
import os, re, sys, uuid, tempfile
|
||||||
from urlparse import urlparse, urlunparse
|
from urlparse import urlparse, urlunparse
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
from functools import partial
|
from functools import partial
|
||||||
@ -272,6 +272,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, opts, file_ext, log,
|
def convert(self, stream, opts, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
self._is_case_sensitive = None
|
||||||
basedir = os.getcwd()
|
basedir = os.getcwd()
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
|
|
||||||
@ -290,6 +291,15 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
return create_oebbook(log, stream.name, opts, self,
|
return create_oebbook(log, stream.name, opts, self,
|
||||||
encoding=opts.input_encoding)
|
encoding=opts.input_encoding)
|
||||||
|
|
||||||
|
def is_case_sensitive(self, path):
|
||||||
|
if self._is_case_sensitive is not None:
|
||||||
|
return self._is_case_sensitive
|
||||||
|
if not path or not os.path.exists(path):
|
||||||
|
return islinux or isfreebsd
|
||||||
|
self._is_case_sensitive = os.path.exists(path.lower()) \
|
||||||
|
and os.path.exists(path.upper())
|
||||||
|
return self._is_case_sensitive
|
||||||
|
|
||||||
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
from calibre.ebooks.oeb.base import DirContainer, \
|
from calibre.ebooks.oeb.base import DirContainer, \
|
||||||
@ -343,14 +353,16 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
self.added_resources = {}
|
self.added_resources = {}
|
||||||
self.log = log
|
self.log = log
|
||||||
|
self.log('Normalizing filename cases')
|
||||||
for path, href in htmlfile_map.items():
|
for path, href in htmlfile_map.items():
|
||||||
if not (islinux or isfreebsd):
|
if not self.is_case_sensitive(path):
|
||||||
path = path.lower()
|
path = path.lower()
|
||||||
self.added_resources[path] = href
|
self.added_resources[path] = href
|
||||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||||
self.urldefrag = urldefrag
|
self.urldefrag = urldefrag
|
||||||
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
||||||
|
|
||||||
|
self.log('Rewriting HTML links')
|
||||||
for f in filelist:
|
for f in filelist:
|
||||||
path = f.path
|
path = f.path
|
||||||
dpath = os.path.dirname(path)
|
dpath = os.path.dirname(path)
|
||||||
@ -415,7 +427,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
if os.path.isdir(link):
|
if os.path.isdir(link):
|
||||||
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
||||||
return link_
|
return link_
|
||||||
if not (islinux or isfreebsd):
|
if not self.is_case_sensitive(tempfile.gettempdir()):
|
||||||
link = link.lower()
|
link = link.lower()
|
||||||
if link not in self.added_resources:
|
if link not in self.added_resources:
|
||||||
bhref = os.path.basename(link)
|
bhref = os.path.basename(link)
|
||||||
|
@ -220,6 +220,10 @@ class Scheduler(QObject):
|
|||||||
self.cac = QAction(QIcon(I('user_profile.svg')), _('Add a custom news source'), self)
|
self.cac = QAction(QIcon(I('user_profile.svg')), _('Add a custom news source'), self)
|
||||||
self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds)
|
self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds)
|
||||||
self.news_menu.addAction(self.cac)
|
self.news_menu.addAction(self.cac)
|
||||||
|
self.news_menu.addSeparator()
|
||||||
|
self.all_action = self.news_menu.addAction(
|
||||||
|
_('Download all scheduled new sources'),
|
||||||
|
self.download_all_scheduled)
|
||||||
|
|
||||||
self.timer = QTimer(self)
|
self.timer = QTimer(self)
|
||||||
self.timer.start(int(self.INTERVAL * 60000))
|
self.timer.start(int(self.INTERVAL * 60000))
|
||||||
@ -304,7 +308,11 @@ class Scheduler(QObject):
|
|||||||
if urn is not None:
|
if urn is not None:
|
||||||
return self.download(urn)
|
return self.download(urn)
|
||||||
for urn in self.recipe_model.scheduled_urns():
|
for urn in self.recipe_model.scheduled_urns():
|
||||||
self.download(urn)
|
if not self.download(urn):
|
||||||
|
break
|
||||||
|
|
||||||
|
def download_all_scheduled(self):
|
||||||
|
self.download_clicked(None)
|
||||||
|
|
||||||
def download(self, urn):
|
def download(self, urn):
|
||||||
self.lock.lock()
|
self.lock.lock()
|
||||||
@ -316,12 +324,13 @@ class Scheduler(QObject):
|
|||||||
'is active'))
|
'is active'))
|
||||||
d.setModal(False)
|
d.setModal(False)
|
||||||
d.show()
|
d.show()
|
||||||
return
|
return False
|
||||||
self.internet_connection_failed = False
|
self.internet_connection_failed = False
|
||||||
doit = urn not in self.download_queue
|
doit = urn not in self.download_queue
|
||||||
self.lock.unlock()
|
self.lock.unlock()
|
||||||
if doit:
|
if doit:
|
||||||
self.do_download(urn)
|
self.do_download(urn)
|
||||||
|
return True
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
recipes = self.recipe_model.get_to_be_downloaded_recipes()
|
recipes = self.recipe_model.get_to_be_downloaded_recipes()
|
||||||
|
@ -113,7 +113,7 @@ Metadata download plugins
|
|||||||
When :meth:`fetch` is called, the `self` object will have the following
|
When :meth:`fetch` is called, the `self` object will have the following
|
||||||
useful attributes (each of which may be None)::
|
useful attributes (each of which may be None)::
|
||||||
|
|
||||||
title, author, publisher, isbn, log, verbose and extra
|
title, book_author, publisher, isbn, log, verbose and extra
|
||||||
|
|
||||||
Use these attributes to construct the search query. extra is reserved for
|
Use these attributes to construct the search query. extra is reserved for
|
||||||
future use.
|
future use.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user