mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
bb6ef9ab4d
@ -247,7 +247,7 @@ function setup_sorting() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
$('table#book_list thead tr td').mouseout(function() {
|
$('table#book_list thead tr td').mouseout(function() {
|
||||||
this.style.backgroundColor = "inherit";
|
this.style.backgroundColor = "transparent";
|
||||||
});
|
});
|
||||||
|
|
||||||
for (i = 0; i < cmap.length; i++) {
|
for (i = 0; i < cmap.length; i++) {
|
||||||
|
BIN
resources/images/news/toronto_sun.png
Normal file
BIN
resources/images/news/toronto_sun.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 630 B |
@ -102,6 +102,7 @@ extensions = [
|
|||||||
libraries=['podofo'],
|
libraries=['podofo'],
|
||||||
lib_dirs=[podofo_lib],
|
lib_dirs=[podofo_lib],
|
||||||
inc_dirs=[podofo_inc],
|
inc_dirs=[podofo_inc],
|
||||||
|
optional=True,
|
||||||
error=podofo_error),
|
error=podofo_error),
|
||||||
|
|
||||||
Extension('pictureflow',
|
Extension('pictureflow',
|
||||||
|
@ -6,13 +6,15 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Freeze app into executable using py2exe.
|
Freeze app into executable using py2exe.
|
||||||
'''
|
'''
|
||||||
|
import sys, os
|
||||||
|
|
||||||
QT_DIR = 'C:\\Qt\\4.5.2'
|
QT_DIR = 'C:\\Qt\\4.5.2'
|
||||||
LIBUSB_DIR = 'C:\\libusb'
|
LIBUSB_DIR = 'C:\\libusb'
|
||||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||||
IMAGEMAGICK_DIR = 'C:\\ImageMagick'
|
|
||||||
SW = r'C:\cygwin\home\kovid\sw'
|
SW = r'C:\cygwin\home\kovid\sw'
|
||||||
|
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.5.6',
|
||||||
|
'VisualMagick', 'bin')
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
def fix_module_finder():
|
def fix_module_finder():
|
||||||
# ModuleFinder can't handle runtime changes to __path__, but win32com uses them
|
# ModuleFinder can't handle runtime changes to __path__, but win32com uses them
|
||||||
@ -186,6 +188,15 @@ class BuildEXE(bc):
|
|||||||
for pat in ('*.dll', '*.sys', '*.cat', '*.inf'):
|
for pat in ('*.dll', '*.sys', '*.cat', '*.inf'):
|
||||||
for f in glob.glob(os.path.join(LIBUSB_DIR, pat)):
|
for f in glob.glob(os.path.join(LIBUSB_DIR, pat)):
|
||||||
shutil.copyfile(f, os.path.join(tdir, os.path.basename(f)))
|
shutil.copyfile(f, os.path.join(tdir, os.path.basename(f)))
|
||||||
|
# Copy ImageMagick
|
||||||
|
for pat in ('*.dll', '*.xml'):
|
||||||
|
for f in glob.glob(self.j(IMAGEMAGICK, pat)):
|
||||||
|
ok = True
|
||||||
|
for ex in ('magick++', 'x11.dll', 'xext.dll'):
|
||||||
|
if ex in f.lower(): ok = False
|
||||||
|
if not ok: continue
|
||||||
|
shutil.copy2(f, self.dll_dir)
|
||||||
|
|
||||||
print '\tAdding unrar'
|
print '\tAdding unrar'
|
||||||
shutil.copyfile(LIBUNRAR, os.path.join(PY2EXE_DIR, os.path.basename(LIBUNRAR)))
|
shutil.copyfile(LIBUNRAR, os.path.join(PY2EXE_DIR, os.path.basename(LIBUNRAR)))
|
||||||
|
|
||||||
|
@ -190,6 +190,9 @@ def main(args=sys.argv):
|
|||||||
elif opts.develop_from is not None:
|
elif opts.develop_from is not None:
|
||||||
develop_from(opts.develop_from)
|
develop_from(opts.develop_from)
|
||||||
else:
|
else:
|
||||||
|
from calibre.utils.config import config_dir
|
||||||
|
ipydir = os.path.join(config_dir, ('_' if iswindows else '.')+'ipython')
|
||||||
|
os.environ['IPYTHONDIR'] = ipydir
|
||||||
from IPython.Shell import IPShellEmbed
|
from IPython.Shell import IPShellEmbed
|
||||||
ipshell = IPShellEmbed()
|
ipshell = IPShellEmbed()
|
||||||
ipshell()
|
ipshell()
|
||||||
|
@ -162,6 +162,7 @@ class USBMS(CLI, Device):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def book_from_path(cls, path):
|
def book_from_path(cls, path):
|
||||||
from calibre.ebooks.metadata.meta import path_to_ext
|
from calibre.ebooks.metadata.meta import path_to_ext
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
mime = mime_type_ext(path_to_ext(path))
|
mime = mime_type_ext(path_to_ext(path))
|
||||||
|
|
||||||
if cls.settings().read_metadata or cls.MUST_READ_METADATA:
|
if cls.settings().read_metadata or cls.MUST_READ_METADATA:
|
||||||
@ -171,6 +172,10 @@ class USBMS(CLI, Device):
|
|||||||
mi = metadata_from_filename(os.path.basename(path),
|
mi = metadata_from_filename(os.path.basename(path),
|
||||||
re.compile(r'^(?P<title>[ \S]+?)[ _]-[ _](?P<author>[ \S]+?)_+\d+'))
|
re.compile(r'^(?P<title>[ \S]+?)[ _]-[ _](?P<author>[ \S]+?)_+\d+'))
|
||||||
|
|
||||||
|
if mi is None:
|
||||||
|
mi = MetaInformation(os.path.splitext(os.path.basename(path))[0],
|
||||||
|
[_('Unknown')])
|
||||||
|
|
||||||
authors = authors_to_string(mi.authors)
|
authors = authors_to_string(mi.authors)
|
||||||
|
|
||||||
book = Book(path, mi.title, authors, mime)
|
book = Book(path, mi.title, authors, mime)
|
||||||
|
@ -4,9 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#pragma once
|
||||||
#ifndef CALIBRE_REFLOW_FONTS
|
|
||||||
#define CALIBRE_REFLOW_FONTS
|
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
@ -102,4 +100,3 @@ class Fonts : public vector<XMLFont*> {
|
|||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
#ifndef _CALIBRE_REFLOW_IMAGES
|
#pragma once
|
||||||
#define _CALIBRE_REFLOW_IMAGES
|
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <GfxState.h>
|
#include <GfxState.h>
|
||||||
@ -127,4 +126,3 @@ namespace calibre_reflow {
|
|||||||
};
|
};
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
@ -5,9 +5,7 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef _CALIBRE_XML_LINKS
|
#pragma once
|
||||||
#define _CALIBRE_XML_LINKS
|
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
@ -65,5 +63,4 @@ class XMLLinks : public vector<XMLLink*> {
|
|||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
|
@ -4,8 +4,7 @@
|
|||||||
* Based on pdftohtml from the poppler project.
|
* Based on pdftohtml from the poppler project.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef CALIBRE_REFLOW
|
#pragma once
|
||||||
#define CALIBRE_REFLOW
|
|
||||||
#define UNICODE
|
#define UNICODE
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
@ -248,4 +247,3 @@ class XMLOutputDev : public OutputDev {
|
|||||||
void process_link(Link* link);
|
void process_link(Link* link);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
@ -4,9 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#ifndef _CALIBRE_REFLOW_UTILS
|
#pragma once
|
||||||
#define _CALIBRE_REFLOW_UTILS
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
@ -45,4 +43,3 @@ inline string encode_for_xml(const string &sSrc )
|
|||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
@ -55,8 +55,7 @@
|
|||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef PICTUREFLOW_H
|
#pragma once
|
||||||
#define PICTUREFLOW_H
|
|
||||||
|
|
||||||
#include <QWidget>
|
#include <QWidget>
|
||||||
|
|
||||||
@ -202,4 +201,3 @@ private:
|
|||||||
PictureFlowPrivate* d;
|
PictureFlowPrivate* d;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // PICTUREFLOW_H
|
|
||||||
|
@ -112,10 +112,13 @@ sudo python setup.py install
|
|||||||
</pre>
|
</pre>
|
||||||
Note that if your distribution does not have a
|
Note that if your distribution does not have a
|
||||||
correctly compiled libunrar.so, ${app} will not
|
correctly compiled libunrar.so, ${app} will not
|
||||||
support rar files. The calibre_postinstall step
|
support rar files. In order to compile ${app} successfully
|
||||||
is required for device detection and integration
|
poppler headers must include XPdf headers. That is, poppler
|
||||||
with your desktop environment.
|
must have been configured with --enable-xpdf-headers. Also,
|
||||||
</p>
|
some distributions have buggy libpng headers. See
|
||||||
|
<a href="https://bugs.launchpad.net/ubuntu/+source/libpng/+bug/218409">here</a>
|
||||||
|
for example.
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
@ -411,7 +411,11 @@ class Config(ConfigInterface):
|
|||||||
if os.path.exists(self.config_file_path):
|
if os.path.exists(self.config_file_path):
|
||||||
try:
|
try:
|
||||||
with ExclusiveFile(self.config_file_path) as f:
|
with ExclusiveFile(self.config_file_path) as f:
|
||||||
src = f.read().decode('utf-8')
|
try:
|
||||||
|
src = f.read().decode('utf-8')
|
||||||
|
except ValueError:
|
||||||
|
print "Failed to parse", self.config_file_path
|
||||||
|
traceback.print_exc()
|
||||||
except LockError:
|
except LockError:
|
||||||
raise IOError('Could not lock config file: %s'%self.config_file_path)
|
raise IOError('Could not lock config file: %s'%self.config_file_path)
|
||||||
return self.option_set.parse_string(src)
|
return self.option_set.parse_string(src)
|
||||||
|
@ -131,7 +131,7 @@ class BaseJob(object):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def details(self):
|
def details(self):
|
||||||
return self.log_file.read().decode('utf-8')
|
return self.log_file.read().decode('utf-8', 'replace')
|
||||||
|
|
||||||
|
|
||||||
class ParallelJob(BaseJob):
|
class ParallelJob(BaseJob):
|
||||||
|
@ -15,6 +15,9 @@
|
|||||||
along with this program; if not, write to the Free Software
|
along with this program; if not, write to the Free Software
|
||||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
typedef struct lz_info lz_info;
|
typedef struct lz_info lz_info;
|
||||||
typedef int (*get_chars_t)(lz_info *lzi, int n, unsigned char *buf);
|
typedef int (*get_chars_t)(lz_info *lzi, int n, unsigned char *buf);
|
||||||
typedef int (*output_match_t)(lz_info *lzi, int match_pos, int match_len);
|
typedef int (*output_match_t)(lz_info *lzi, int match_pos, int match_len);
|
||||||
|
@ -16,6 +16,8 @@
|
|||||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#include "msstdint.h"
|
#include "msstdint.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -10,10 +10,10 @@
|
|||||||
* For further details, see the file COPYING.LIB distributed with libmspack
|
* For further details, see the file COPYING.LIB distributed with libmspack
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
#ifndef MSPACK_LZX_H
|
|
||||||
#define MSPACK_LZX_H 1
|
|
||||||
|
|
||||||
/* LZX compression / decompression definitions */
|
/* LZX compression / decompression definitions */
|
||||||
|
|
||||||
@ -166,4 +166,3 @@ extern int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes);
|
|||||||
*/
|
*/
|
||||||
void lzxd_free(struct lzxd_stream *lzx);
|
void lzxd_free(struct lzxd_stream *lzx);
|
||||||
|
|
||||||
#endif
|
|
||||||
|
@ -3,13 +3,10 @@
|
|||||||
*
|
*
|
||||||
* Common declarations for Python module C glue code.
|
* Common declarations for Python module C glue code.
|
||||||
*/
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
|
|
||||||
#ifndef LZXMODULE_H
|
|
||||||
#define LZXMODULE_H
|
|
||||||
|
|
||||||
extern PyObject *LZXError;
|
extern PyObject *LZXError;
|
||||||
extern PyTypeObject CompressorType;
|
extern PyTypeObject CompressorType;
|
||||||
|
|
||||||
#endif /* LZXMODULE_H */
|
|
||||||
|
@ -115,8 +115,7 @@
|
|||||||
* - #MSPACK_ERR_DECRUNCH indicates an error occured during decompression.
|
* - #MSPACK_ERR_DECRUNCH indicates an error occured during decompression.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LIB_MSPACK_H
|
#pragma once
|
||||||
#define LIB_MSPACK_H 1
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -1479,4 +1478,3 @@ struct mskwaj_decompressor {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
|
@ -29,17 +29,12 @@
|
|||||||
//
|
//
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
#ifndef _MSC_VER // [
|
#ifndef _MSC_VER // [
|
||||||
#error "Use this header only with Microsoft Visual C++ compilers!"
|
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||||
#endif // _MSC_VER ]
|
#endif // _MSC_VER ]
|
||||||
|
|
||||||
#ifndef _MSC_STDINT_H_ // [
|
|
||||||
#define _MSC_STDINT_H_
|
|
||||||
|
|
||||||
#if _MSC_VER > 1000
|
|
||||||
#pragma once
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
||||||
// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
|
// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
|
||||||
@ -228,5 +223,3 @@ typedef uint64_t uintmax_t;
|
|||||||
|
|
||||||
#endif // __STDC_CONSTANT_MACROS ]
|
#endif // __STDC_CONSTANT_MACROS ]
|
||||||
|
|
||||||
|
|
||||||
#endif // _MSC_STDINT_H_ ]
|
|
||||||
|
@ -7,8 +7,7 @@
|
|||||||
* For further details, see the file COPYING.LIB distributed with libmspack
|
* For further details, see the file COPYING.LIB distributed with libmspack
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef MSPACK_SYSTEM_H
|
#pragma once
|
||||||
#define MSPACK_SYSTEM_H 1
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#define inline
|
#define inline
|
||||||
@ -62,5 +61,3 @@ static inline size_t strlen(const char *s) {
|
|||||||
return e - s;
|
return e - s;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
|
@ -10,6 +10,8 @@
|
|||||||
* (GEnie : OUTER; CIS : [71755,204])
|
* (GEnie : OUTER; CIS : [71755,204])
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
#undef D2_DES
|
#undef D2_DES
|
||||||
#undef D3_DES
|
#undef D3_DES
|
||||||
|
|
||||||
|
@ -21,6 +21,8 @@
|
|||||||
| URL: http://www.gnu.org/licenses/gpl.html
|
| URL: http://www.gnu.org/licenses/gpl.html
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
static unsigned long SP1[64] = {
|
static unsigned long SP1[64] = {
|
||||||
0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
|
0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
|
||||||
0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
|
0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
|
||||||
|
@ -6,7 +6,7 @@ Builtin recipes.
|
|||||||
'''
|
'''
|
||||||
recipe_modules = ['recipe_' + r for r in (
|
recipe_modules = ['recipe_' + r for r in (
|
||||||
'newsweek', 'atlantic', 'economist', 'portfolio', 'the_register',
|
'newsweek', 'atlantic', 'economist', 'portfolio', 'the_register',
|
||||||
'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj',
|
'usatoday', 'bbc', 'greader', 'wsj', #'outlook_india',
|
||||||
'wired', 'globe_and_mail', 'smh', 'espn', 'business_week', 'miami_herald',
|
'wired', 'globe_and_mail', 'smh', 'espn', 'business_week', 'miami_herald',
|
||||||
'ars_technica', 'upi', 'new_yorker', 'irish_times', 'lanacion',
|
'ars_technica', 'upi', 'new_yorker', 'irish_times', 'lanacion',
|
||||||
'discover_magazine', 'scientific_american', 'new_york_review_of_books',
|
'discover_magazine', 'scientific_american', 'new_york_review_of_books',
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: cp1252 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
@ -10,28 +11,62 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Honoluluadvertiser(BasicNewsRecipe):
|
class Honoluluadvertiser(BasicNewsRecipe):
|
||||||
title = 'Honolulu Advertiser'
|
title = 'Honolulu Advertiser'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic and Sujata Raman'
|
||||||
description = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
|
description = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
|
||||||
publisher = 'Honolulu Advertiser'
|
publisher = 'Honolulu Advertiser'
|
||||||
category = 'news, Honolulu, Hawaii'
|
category = 'news, Honolulu, Hawaii'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://www.honoluluadvertiser.com/graphics/frontpage/frontpage.jpg'
|
||||||
|
|
||||||
conversion_options = {
|
html2lrf_options = [
|
||||||
'comments' : description
|
'--comment' , description
|
||||||
,'tags' : category
|
, '--category' , category
|
||||||
,'language' : language
|
, '--publisher' , publisher
|
||||||
,'publisher' : publisher
|
]
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='td')]
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link'])]
|
keep_only_tags = [dict(name='div', attrs={'class':["hon_article_top","article-bodytext","hon_article_photo","storyphoto","article"]}),
|
||||||
remove_attributes = ['style']
|
dict(name='div', attrs={'id':["storycontentleft","article"]})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link','embed']),
|
||||||
|
dict(name='div', attrs={'class':["article-tools","titleBar","invisiblespacer","articleflex-container","hon_newslist","categoryheader","columnframe","subHeadline","poster-container"]}),
|
||||||
|
dict(name='div', attrs={'align':["right"]}),
|
||||||
|
dict(name='div', attrs={'id':["pluckcomments"]}),
|
||||||
|
dict(name='td', attrs={'class':["prepsfacts"]}),
|
||||||
|
dict(name='img', attrs={'height':["1"]}),
|
||||||
|
dict(name='img', attrs={'alt':["Advertisement"]}),
|
||||||
|
dict(name='img', attrs={'src':["/gcicommonfiles/sr/graphics/common/adlabel_horz.gif","/gcicommonfiles/sr/graphics/common/icon_whatsthis.gif",]}),
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; }
|
||||||
|
.hon_article_timestamp{font-family:Arial,Helvetica,sans-serif; font-size:70%; }
|
||||||
|
.postedStoryDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
|
||||||
|
.postedDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
|
||||||
|
.credit{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
|
||||||
|
.hon_article_top{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%; font-weight:bold;}
|
||||||
|
.grayBackground{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%;}
|
||||||
|
.hon_photocaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
|
||||||
|
.photoCaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
|
||||||
|
.hon_photocredit{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;}
|
||||||
|
.storyphoto{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;}
|
||||||
|
.article-bodytext{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
|
||||||
|
.storycontentleft{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
|
||||||
|
#article{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
|
||||||
|
.contentarea{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
|
||||||
|
.storytext{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||||
|
.storyHeadline{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; font-weight:bold;}
|
||||||
|
.source{font-family:Arial,Helvetica,sans-serif; color:#333333; font-style: italic; font-weight:bold; }
|
||||||
|
'''
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
|
(u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
|
||||||
@ -43,13 +78,19 @@ class Honoluluadvertiser(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
st = soup.find('td')
|
for item in soup.findAll(style=True):
|
||||||
if st:
|
del item['style']
|
||||||
st.name = 'div'
|
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
|
||||||
|
for tag in soup.findAll(name=['span','table','font']):
|
||||||
|
tag.name = 'div'
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
ubody, sep, rest = url.rpartition('?source')
|
# def print_version(self, url):
|
||||||
root, sep2, article_id = ubody.partition('/article/')
|
# ubody, sep, rest = url.rpartition('/-1/')
|
||||||
return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'
|
# root, sep2, article_id = ubody.partition('/article/')
|
||||||
|
# return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
@ -10,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class TheMiamiHerald(BasicNewsRecipe):
|
class TheMiamiHerald(BasicNewsRecipe):
|
||||||
title = 'The Miami Herald'
|
title = 'The Miami Herald'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic and Sujata Raman'
|
||||||
description = "Miami-Dade and Broward's source for the latest breaking local news on sports, weather, business, jobs, real estate, shopping, health, travel, entertainment, & more."
|
description = "Miami-Dade and Broward's source for the latest breaking local news on sports, weather, business, jobs, real estate, shopping, health, travel, entertainment, & more."
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -22,19 +21,28 @@ class TheMiamiHerald(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
html2lrf_options = [
|
extra_css = '''
|
||||||
'--comment' , description
|
h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#1A272F; }
|
||||||
, '--category' , category
|
.subheadline{font-family:Arial,Helvetica,sans-serif; font-size:30%; color: #666666;}
|
||||||
, '--publisher' , publisher
|
#storyBodyContent{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
|
||||||
]
|
.byline{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#58595B; }
|
||||||
|
.credit_line{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#58595B; }
|
||||||
|
.storyPublishDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666; }
|
||||||
|
.shirttail{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;font-style:italic }
|
||||||
|
.imageCaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666; }
|
||||||
|
'''
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':['storyBody','storyPhotoContentArea']}),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link','embed']),
|
||||||
|
dict(name='div', attrs={'class':["imageBuyButton","shareLinksArea","storyTools","spill_navigation pagination","circPromoArea","storyTools_footer","storyYahooContentMatch"]}) ,
|
||||||
|
dict(name='div', attrs={'id':["pluck","mlt","storyAssets"]}) ]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'pageContainer'})]
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Breaking News' , u'http://www.miamiherald.com/416/index.xml' )
|
(u'Breaking News' , u'http://www.miamiherald.com/416/index.xml' )
|
||||||
,(u'Miami-Dade' , u'http://www.miamiherald.com/460/index.xml' )
|
,(u'Miami-Dade' , u'http://www.miamiherald.com/460/index.xml' )
|
||||||
,(u'Broward' , u'http://www.miamiherald.com/467/index.xml' )
|
,(u'Broward' , u'http://www.miamiherald.com/467/index.xml' )
|
||||||
,(u'Florida Keys' , u'http://www.miamiherald.com/505/index.xml' )
|
,(u'Florida Keys' , u'http://www.miamiherald.com/505/index.xml' )
|
||||||
@ -49,6 +57,26 @@ class TheMiamiHerald(BasicNewsRecipe):
|
|||||||
,(u'Environment' , u'http://www.miamiherald.com/573/index.xml' )
|
,(u'Environment' , u'http://www.miamiherald.com/573/index.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('/story/','/v-print/story/')
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
ans = article.get('guid', None)
|
||||||
|
print ans
|
||||||
|
try:
|
||||||
|
self.log('Looking for full story link in', ans)
|
||||||
|
soup = self.index_to_soup(ans)
|
||||||
|
x = soup.find(text="Full Story")
|
||||||
|
|
||||||
|
if x is not None:
|
||||||
|
a = x.parent
|
||||||
|
if a and a.has_key('href'):
|
||||||
|
ans = 'http://www.miamiherald.com'+a['href']
|
||||||
|
self.log('Found full story link', ans)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,18 +10,64 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Moscowtimes(BasicNewsRecipe):
|
class Moscowtimes(BasicNewsRecipe):
|
||||||
title = u'The Moscow Times'
|
title = u'The Moscow Times'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic and Sujata Raman'
|
||||||
description = 'News from Russia'
|
description = 'News from Russia'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
lang = 'en'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
#encoding = 'utf-8'
|
||||||
|
encoding = 'cp1252'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'language' : lang
|
||||||
|
}
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large}
|
||||||
|
.article_date{ font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; color:#000000; font-size: x-small;}
|
||||||
|
.autors{color:#999999 ; font-weight: bold ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
|
||||||
|
.photoautors{ color:#999999 ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
|
||||||
|
.text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; }
|
||||||
|
'''
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'The Moscow Times' , u'http://www.themoscowtimes.com/rss.xml' )
|
(u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'),
|
||||||
|
(u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'),
|
||||||
|
(u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'),
|
||||||
|
(u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'),
|
||||||
|
(u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'),
|
||||||
|
(u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion')
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
keep_only_tags = [
|
||||||
return url + '&print=Y'
|
dict(name='div', attrs={'class':['newstextblock']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['photo_nav']})
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['xml:lang'] = self.lang
|
||||||
|
soup.html['lang'] = self.lang
|
||||||
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
|
||||||
|
href = 'http://www.themoscowtimes.com/pdf/'
|
||||||
|
|
||||||
|
soup = self.index_to_soup(href)
|
||||||
|
div = soup.find('div',attrs={'class':'left'})
|
||||||
|
a = div.find('a')
|
||||||
|
print a
|
||||||
|
if a :
|
||||||
|
cover_url = a.img['src']
|
||||||
|
return cover_url
|
||||||
|
@ -1,62 +1,43 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
msdn.microsoft.com/en-us/magazine
|
msdn.microsoft.com/en-us/magazine
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class MSDNMagazine_en(BasicNewsRecipe):
|
class MSDNMagazine_en(BasicNewsRecipe):
|
||||||
title = 'MSDN Magazine'
|
title = 'MSDN Magazine'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'The Microsoft Journal for Developers'
|
description = 'The Microsoft Journal for Developers'
|
||||||
publisher = 'Microsoft Press'
|
publisher = 'Microsoft Press'
|
||||||
category = 'news, IT, Microsoft, programming, windows'
|
category = 'news, IT, Microsoft, programming, windows'
|
||||||
oldest_article = 31
|
oldest_article = 31
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
language = 'en'
|
||||||
current_issue = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description
|
keep_only_tags = [dict(name='div', attrs={'class':'navpage'})]
|
||||||
, '--category', category
|
|
||||||
, '--publisher', publisher
|
remove_tags = [
|
||||||
]
|
dict(name=['object','link','base','table'])
|
||||||
|
,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
]
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'navpage'})
|
||||||
feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'topic'})]
|
for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
|
||||||
|
item.name="h2"
|
||||||
remove_tags = [
|
for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
|
||||||
dict(name=['object','link','base','table'])
|
item.name="h1"
|
||||||
,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
|
for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
|
||||||
]
|
item.name="h3"
|
||||||
|
return soup
|
||||||
def get_cover_url(self):
|
|
||||||
cover_url = None
|
|
||||||
soup = self.index_to_soup(self.current_issue)
|
|
||||||
link_item = soup.find('span',attrs={'class':'ContentsImageSpacer'})
|
|
||||||
if link_item:
|
|
||||||
imgt = link_item.find('img')
|
|
||||||
if imgt:
|
|
||||||
cover_url = imgt['src']
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
|
|
||||||
item.name="h2"
|
|
||||||
for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
|
|
||||||
item.name="h1"
|
|
||||||
for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
|
|
||||||
item.name="h3"
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
@ -10,10 +10,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class OurDailyBread(BasicNewsRecipe):
|
class OurDailyBread(BasicNewsRecipe):
|
||||||
title = 'Our Daily Bread'
|
title = 'Our Daily Bread'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic and Sujata Raman'
|
||||||
description = 'Religion'
|
description = 'Religion'
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
lang = 'en'
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -21,16 +22,43 @@ class OurDailyBread(BasicNewsRecipe):
|
|||||||
category = 'religion'
|
category = 'religion'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
extra_css = ' #devoTitle{font-size: x-large; font-weight: bold} '
|
extra_css = ' #devoTitle{font-size: x-large; font-weight: bold} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
,'language' : 'en'
|
,'language' : 'en'
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['altbg','text']})]
|
keep_only_tags = [dict(name='div', attrs={'class':['altbg','text']})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':['ctl00_cphPrimary_pnlBookCover']}),
|
||||||
|
dict(name='div', attrs={'class':['devotionalLinks']})
|
||||||
|
]
|
||||||
|
extra_css = '''
|
||||||
|
.text{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||||
|
.devotionalTitle{font-family:Arial,Helvetica,sans-serif; font-size:large; font-weight: bold;}
|
||||||
|
.devotionalDate{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||||
|
.devotionalVerse{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
|
||||||
|
'''
|
||||||
|
|
||||||
feeds = [(u'Our Daily Bread', u'http://www.rbc.org/rss.ashx?id=50398')]
|
feeds = [(u'Our Daily Bread', u'http://www.rbc.org/rss.ashx?id=50398')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['xml:lang'] = self.lang
|
||||||
|
soup.html['lang'] = self.lang
|
||||||
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
|
||||||
|
href = 'http://www.rbc.org/index.aspx'
|
||||||
|
|
||||||
|
soup = self.index_to_soup(href)
|
||||||
|
a = soup.find('a',attrs={'id':'ctl00_hlTodaysDevotionalImage'})
|
||||||
|
|
||||||
|
if a :
|
||||||
|
cover_url = a.img['src']
|
||||||
|
|
||||||
|
return cover_url
|
||||||
|
@ -1,57 +1,50 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.straitstimes.com
|
www.straitstimes.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class StraitsTimes(BasicNewsRecipe):
|
class StraitsTimes(BasicNewsRecipe):
|
||||||
title = 'The Straits Times'
|
title = 'The Straits Times'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Singapore newspaper'
|
description = 'Singapore newspaper'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
publisher = 'Singapore Press Holdings Ltd.'
|
publisher = 'Singapore Press Holdings Ltd.'
|
||||||
category = 'news, politics, singapore, asia'
|
category = 'news, politics, singapore, asia'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
extra_css = ' .top_headline{font-size: x-large; font-weight: bold} '
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comments' : description
|
||||||
, '--category', category
|
,'tags' : category
|
||||||
, '--publisher', publisher
|
,'language' : language
|
||||||
, '--ignore-tables'
|
,'publisher' : publisher
|
||||||
]
|
}
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
remove_tags = [dict(name=['object','link','map'])]
|
||||||
|
|
||||||
remove_tags = [
|
keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})]
|
||||||
dict(name=['object','link'])
|
|
||||||
,dict(name='table', attrs={'width':'980'})
|
feeds = [
|
||||||
,dict(name='td' , attrs={'class':'padlrt10'})
|
(u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' )
|
||||||
]
|
,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' )
|
||||||
|
,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' )
|
||||||
feeds = [
|
,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' )
|
||||||
(u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' )
|
,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' )
|
||||||
,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' )
|
,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' )
|
||||||
,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' )
|
,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' )
|
||||||
,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' )
|
]
|
||||||
,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' )
|
|
||||||
,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' )
|
def preprocess_html(self, soup):
|
||||||
,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' )
|
for item in soup.findAll(style=True):
|
||||||
]
|
del item['style']
|
||||||
|
return soup
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('http://www.straitstimes.com','http://www.straitstimes.com/print')
|
|
||||||
|
|
||||||
|
50
src/calibre/web/feeds/recipes/recipe_toronto_sun.py
Normal file
50
src/calibre/web/feeds/recipes/recipe_toronto_sun.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.torontosun.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TorontoSun(BasicNewsRecipe):
|
||||||
|
title = 'Toronto SUN'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Canada'
|
||||||
|
publisher = 'Toronto Sun'
|
||||||
|
category = 'news, politics, Canada'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'cp1252'
|
||||||
|
language = 'en_CA'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags =[
|
||||||
|
dict(name='div', attrs={'class':'articleHead'})
|
||||||
|
,dict(name='div', attrs={'id':'channelContent'})
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']})
|
||||||
|
,dict(name=['link','iframe','object'])
|
||||||
|
,dict(name='a',attrs={'rel':'swap'})
|
||||||
|
,dict(name='ul',attrs={'class':'tabs dl contentSwap'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'})
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News' , u'http://www.torontosun.com/news/rss.xml' )
|
||||||
|
,(u'Canada' , u'http://www.torontosun.com/news/canada/rss.xml' )
|
||||||
|
,(u'Columnists' , u'http://www.torontosun.com/news/columnists/rss.xml')
|
||||||
|
,(u'World' , u'http://www.torontosun.com/news/world/rss.xml' )
|
||||||
|
,(u'Money' , u'http://www.torontosun.com/money/rss.xml' )
|
||||||
|
]
|
@ -8,7 +8,7 @@ Usage:
|
|||||||
First use 'doAuth' to request the client authentication for a
|
First use 'doAuth' to request the client authentication for a
|
||||||
certain resource. You should send an httplib.UNAUTHORIZED response to the
|
certain resource. You should send an httplib.UNAUTHORIZED response to the
|
||||||
client so he knows he has to authenticate itself.
|
client so he knows he has to authenticate itself.
|
||||||
|
|
||||||
Then use 'parseAuthorization' to retrieve the 'auth_map' used in
|
Then use 'parseAuthorization' to retrieve the 'auth_map' used in
|
||||||
'checkResponse'.
|
'checkResponse'.
|
||||||
|
|
||||||
@ -30,27 +30,27 @@ __license__ = """
|
|||||||
Copyright (c) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net>
|
Copyright (c) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net>
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
are permitted provided that the following conditions are met:
|
are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
this list of conditions and the following disclaimer.
|
this list of conditions and the following disclaimer.
|
||||||
* Redistributions in binary form must reproduce the above copyright notice,
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
this list of conditions and the following disclaimer in the documentation
|
this list of conditions and the following disclaimer in the documentation
|
||||||
and/or other materials provided with the distribution.
|
and/or other materials provided with the distribution.
|
||||||
* Neither the name of Sylvain Hellegouarch nor the names of his contributors
|
* Neither the name of Sylvain Hellegouarch nor the names of his contributors
|
||||||
may be used to endorse or promote products derived from this software
|
may be used to endorse or promote products derived from this software
|
||||||
without specific prior written permission.
|
without specific prior written permission.
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -83,6 +83,7 @@ SUPPORTED_QOP = (AUTH, AUTH_INT)
|
|||||||
#
|
#
|
||||||
DIGEST_AUTH_ENCODERS = {
|
DIGEST_AUTH_ENCODERS = {
|
||||||
MD5: lambda val: md5(val).hexdigest(),
|
MD5: lambda val: md5(val).hexdigest(),
|
||||||
|
'md5': lambda val:md5(val).hexdigest(),
|
||||||
MD5_SESS: lambda val: md5(val).hexdigest(),
|
MD5_SESS: lambda val: md5(val).hexdigest(),
|
||||||
# SHA: lambda val: sha(val).hexdigest(),
|
# SHA: lambda val: sha(val).hexdigest(),
|
||||||
}
|
}
|
||||||
@ -125,7 +126,7 @@ def doAuth (realm):
|
|||||||
"""'doAuth' function returns the challenge string b giving priority over
|
"""'doAuth' function returns the challenge string b giving priority over
|
||||||
Digest and fallback to Basic authentication when the browser doesn't
|
Digest and fallback to Basic authentication when the browser doesn't
|
||||||
support the first one.
|
support the first one.
|
||||||
|
|
||||||
This should be set in the HTTP header under the key 'WWW-Authenticate'."""
|
This should be set in the HTTP header under the key 'WWW-Authenticate'."""
|
||||||
|
|
||||||
return digestAuth (realm) + " " + basicAuth (realm)
|
return digestAuth (realm) + " " + basicAuth (realm)
|
||||||
@ -195,7 +196,7 @@ def parseAuthorization (credentials):
|
|||||||
#
|
#
|
||||||
def md5SessionKey (params, password):
|
def md5SessionKey (params, password):
|
||||||
"""
|
"""
|
||||||
If the "algorithm" directive's value is "MD5-sess", then A1
|
If the "algorithm" directive's value is "MD5-sess", then A1
|
||||||
[the session key] is calculated only once - on the first request by the
|
[the session key] is calculated only once - on the first request by the
|
||||||
client following receipt of a WWW-Authenticate challenge from the server.
|
client following receipt of a WWW-Authenticate challenge from the server.
|
||||||
|
|
||||||
@ -340,24 +341,24 @@ AUTH_RESPONSES = {
|
|||||||
def checkResponse (auth_map, password, method = "GET", encrypt=None, **kwargs):
|
def checkResponse (auth_map, password, method = "GET", encrypt=None, **kwargs):
|
||||||
"""'checkResponse' compares the auth_map with the password and optionally
|
"""'checkResponse' compares the auth_map with the password and optionally
|
||||||
other arguments that each implementation might need.
|
other arguments that each implementation might need.
|
||||||
|
|
||||||
If the response is of type 'Basic' then the function has the following
|
If the response is of type 'Basic' then the function has the following
|
||||||
signature:
|
signature:
|
||||||
|
|
||||||
checkBasicResponse (auth_map, password) -> bool
|
checkBasicResponse (auth_map, password) -> bool
|
||||||
|
|
||||||
If the response is of type 'Digest' then the function has the following
|
If the response is of type 'Digest' then the function has the following
|
||||||
signature:
|
signature:
|
||||||
|
|
||||||
checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool
|
checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool
|
||||||
|
|
||||||
The 'A1' argument is only used in MD5_SESS algorithm based responses.
|
The 'A1' argument is only used in MD5_SESS algorithm based responses.
|
||||||
Check md5SessionKey() for more info.
|
Check md5SessionKey() for more info.
|
||||||
"""
|
"""
|
||||||
global AUTH_RESPONSES
|
global AUTH_RESPONSES
|
||||||
checker = AUTH_RESPONSES[auth_map["auth_scheme"]]
|
checker = AUTH_RESPONSES[auth_map["auth_scheme"]]
|
||||||
return checker (auth_map, password, method=method, encrypt=encrypt, **kwargs)
|
return checker (auth_map, password, method=method, encrypt=encrypt, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user