Sync to trunk.

This commit is contained in:
John Schember 2009-10-08 07:41:42 -04:00
commit bb6ef9ab4d
33 changed files with 409 additions and 236 deletions

View File

@ -247,7 +247,7 @@ function setup_sorting() {
}); });
$('table#book_list thead tr td').mouseout(function() { $('table#book_list thead tr td').mouseout(function() {
this.style.backgroundColor = "inherit"; this.style.backgroundColor = "transparent";
}); });
for (i = 0; i < cmap.length; i++) { for (i = 0; i < cmap.length; i++) {

Binary file not shown.

After

Width:  |  Height:  |  Size: 630 B

View File

@ -102,6 +102,7 @@ extensions = [
libraries=['podofo'], libraries=['podofo'],
lib_dirs=[podofo_lib], lib_dirs=[podofo_lib],
inc_dirs=[podofo_inc], inc_dirs=[podofo_inc],
optional=True,
error=podofo_error), error=podofo_error),
Extension('pictureflow', Extension('pictureflow',

View File

@ -6,13 +6,15 @@ __docformat__ = 'restructuredtext en'
''' '''
Freeze app into executable using py2exe. Freeze app into executable using py2exe.
''' '''
import sys, os
QT_DIR = 'C:\\Qt\\4.5.2' QT_DIR = 'C:\\Qt\\4.5.2'
LIBUSB_DIR = 'C:\\libusb' LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
IMAGEMAGICK_DIR = 'C:\\ImageMagick'
SW = r'C:\cygwin\home\kovid\sw' SW = r'C:\cygwin\home\kovid\sw'
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.5.6',
'VisualMagick', 'bin')
import sys
def fix_module_finder(): def fix_module_finder():
# ModuleFinder can't handle runtime changes to __path__, but win32com uses them # ModuleFinder can't handle runtime changes to __path__, but win32com uses them
@ -186,6 +188,15 @@ class BuildEXE(bc):
for pat in ('*.dll', '*.sys', '*.cat', '*.inf'): for pat in ('*.dll', '*.sys', '*.cat', '*.inf'):
for f in glob.glob(os.path.join(LIBUSB_DIR, pat)): for f in glob.glob(os.path.join(LIBUSB_DIR, pat)):
shutil.copyfile(f, os.path.join(tdir, os.path.basename(f))) shutil.copyfile(f, os.path.join(tdir, os.path.basename(f)))
# Copy ImageMagick
for pat in ('*.dll', '*.xml'):
for f in glob.glob(self.j(IMAGEMAGICK, pat)):
ok = True
for ex in ('magick++', 'x11.dll', 'xext.dll'):
if ex in f.lower(): ok = False
if not ok: continue
shutil.copy2(f, self.dll_dir)
print '\tAdding unrar' print '\tAdding unrar'
shutil.copyfile(LIBUNRAR, os.path.join(PY2EXE_DIR, os.path.basename(LIBUNRAR))) shutil.copyfile(LIBUNRAR, os.path.join(PY2EXE_DIR, os.path.basename(LIBUNRAR)))

View File

@ -190,6 +190,9 @@ def main(args=sys.argv):
elif opts.develop_from is not None: elif opts.develop_from is not None:
develop_from(opts.develop_from) develop_from(opts.develop_from)
else: else:
from calibre.utils.config import config_dir
ipydir = os.path.join(config_dir, ('_' if iswindows else '.')+'ipython')
os.environ['IPYTHONDIR'] = ipydir
from IPython.Shell import IPShellEmbed from IPython.Shell import IPShellEmbed
ipshell = IPShellEmbed() ipshell = IPShellEmbed()
ipshell() ipshell()

View File

@ -162,6 +162,7 @@ class USBMS(CLI, Device):
@classmethod @classmethod
def book_from_path(cls, path): def book_from_path(cls, path):
from calibre.ebooks.metadata.meta import path_to_ext from calibre.ebooks.metadata.meta import path_to_ext
from calibre.ebooks.metadata import MetaInformation
mime = mime_type_ext(path_to_ext(path)) mime = mime_type_ext(path_to_ext(path))
if cls.settings().read_metadata or cls.MUST_READ_METADATA: if cls.settings().read_metadata or cls.MUST_READ_METADATA:
@ -171,6 +172,10 @@ class USBMS(CLI, Device):
mi = metadata_from_filename(os.path.basename(path), mi = metadata_from_filename(os.path.basename(path),
re.compile(r'^(?P<title>[ \S]+?)[ _]-[ _](?P<author>[ \S]+?)_+\d+')) re.compile(r'^(?P<title>[ \S]+?)[ _]-[ _](?P<author>[ \S]+?)_+\d+'))
if mi is None:
mi = MetaInformation(os.path.splitext(os.path.basename(path))[0],
[_('Unknown')])
authors = authors_to_string(mi.authors) authors = authors_to_string(mi.authors)
book = Book(path, mi.title, authors, mime) book = Book(path, mi.title, authors, mime)

View File

@ -4,9 +4,7 @@
*/ */
#pragma once
#ifndef CALIBRE_REFLOW_FONTS
#define CALIBRE_REFLOW_FONTS
#include <vector> #include <vector>
#include <sstream> #include <sstream>
@ -102,4 +100,3 @@ class Fonts : public vector<XMLFont*> {
} }
#endif

View File

@ -1,5 +1,4 @@
#ifndef _CALIBRE_REFLOW_IMAGES #pragma once
#define _CALIBRE_REFLOW_IMAGES
#include <vector> #include <vector>
#include <GfxState.h> #include <GfxState.h>
@ -127,4 +126,3 @@ namespace calibre_reflow {
}; };
*/ */
} }
#endif

View File

@ -5,9 +5,7 @@
#ifndef _CALIBRE_XML_LINKS #pragma once
#define _CALIBRE_XML_LINKS
#include <vector> #include <vector>
#include <sstream> #include <sstream>
@ -65,5 +63,4 @@ class XMLLinks : public vector<XMLLink*> {
} }
#endif

View File

@ -4,8 +4,7 @@
* Based on pdftohtml from the poppler project. * Based on pdftohtml from the poppler project.
*/ */
#ifndef CALIBRE_REFLOW #pragma once
#define CALIBRE_REFLOW
#define UNICODE #define UNICODE
#ifdef _WIN32 #ifdef _WIN32
@ -248,4 +247,3 @@ class XMLOutputDev : public OutputDev {
void process_link(Link* link); void process_link(Link* link);
}; };
} }
#endif

View File

@ -4,9 +4,7 @@
*/ */
#ifndef _CALIBRE_REFLOW_UTILS #pragma once
#define _CALIBRE_REFLOW_UTILS
#include <string> #include <string>
#include <sstream> #include <sstream>
@ -45,4 +43,3 @@ inline string encode_for_xml(const string &sSrc )
} }
#endif

View File

@ -55,8 +55,7 @@
THE SOFTWARE. THE SOFTWARE.
*/ */
#ifndef PICTUREFLOW_H #pragma once
#define PICTUREFLOW_H
#include <QWidget> #include <QWidget>
@ -202,4 +201,3 @@ private:
PictureFlowPrivate* d; PictureFlowPrivate* d;
}; };
#endif // PICTUREFLOW_H

View File

@ -112,10 +112,13 @@ sudo python setup.py install
</pre> </pre>
Note that if your distribution does not have a Note that if your distribution does not have a
correctly compiled libunrar.so, ${app} will not correctly compiled libunrar.so, ${app} will not
support rar files. The calibre_postinstall step support rar files. In order to compile ${app} successfully
is required for device detection and integration poppler headers must include XPdf headers. That is, poppler
with your desktop environment. must have been configured with --enable-xpdf-headers. Also,
</p> some distributions have buggy libpng headers. See
<a href="https://bugs.launchpad.net/ubuntu/+source/libpng/+bug/218409">here</a>
for example.
</p>
</div> </div>
</td> </td>
</tr> </tr>

View File

@ -411,7 +411,11 @@ class Config(ConfigInterface):
if os.path.exists(self.config_file_path): if os.path.exists(self.config_file_path):
try: try:
with ExclusiveFile(self.config_file_path) as f: with ExclusiveFile(self.config_file_path) as f:
src = f.read().decode('utf-8') try:
src = f.read().decode('utf-8')
except ValueError:
print "Failed to parse", self.config_file_path
traceback.print_exc()
except LockError: except LockError:
raise IOError('Could not lock config file: %s'%self.config_file_path) raise IOError('Could not lock config file: %s'%self.config_file_path)
return self.option_set.parse_string(src) return self.option_set.parse_string(src)

View File

@ -131,7 +131,7 @@ class BaseJob(object):
@property @property
def details(self): def details(self):
return self.log_file.read().decode('utf-8') return self.log_file.read().decode('utf-8', 'replace')
class ParallelJob(BaseJob): class ParallelJob(BaseJob):

View File

@ -15,6 +15,9 @@
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/ */
#pragma once
typedef struct lz_info lz_info; typedef struct lz_info lz_info;
typedef int (*get_chars_t)(lz_info *lzi, int n, unsigned char *buf); typedef int (*get_chars_t)(lz_info *lzi, int n, unsigned char *buf);
typedef int (*output_match_t)(lz_info *lzi, int match_pos, int match_len); typedef int (*output_match_t)(lz_info *lzi, int match_pos, int match_len);

View File

@ -16,6 +16,8 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/ */
#pragma once
#ifdef _MSC_VER #ifdef _MSC_VER
#include "msstdint.h" #include "msstdint.h"
#endif #endif

View File

@ -10,10 +10,10 @@
* For further details, see the file COPYING.LIB distributed with libmspack * For further details, see the file COPYING.LIB distributed with libmspack
*/ */
#pragma once
#include <sys/types.h> #include <sys/types.h>
#ifndef MSPACK_LZX_H
#define MSPACK_LZX_H 1
/* LZX compression / decompression definitions */ /* LZX compression / decompression definitions */
@ -166,4 +166,3 @@ extern int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes);
*/ */
void lzxd_free(struct lzxd_stream *lzx); void lzxd_free(struct lzxd_stream *lzx);
#endif

View File

@ -3,13 +3,10 @@
* *
* Common declarations for Python module C glue code. * Common declarations for Python module C glue code.
*/ */
#pragma once
#include <Python.h> #include <Python.h>
#ifndef LZXMODULE_H
#define LZXMODULE_H
extern PyObject *LZXError; extern PyObject *LZXError;
extern PyTypeObject CompressorType; extern PyTypeObject CompressorType;
#endif /* LZXMODULE_H */

View File

@ -115,8 +115,7 @@
* - #MSPACK_ERR_DECRUNCH indicates an error occured during decompression. * - #MSPACK_ERR_DECRUNCH indicates an error occured during decompression.
*/ */
#ifndef LIB_MSPACK_H #pragma once
#define LIB_MSPACK_H 1
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -1479,4 +1478,3 @@ struct mskwaj_decompressor {
}; };
#endif #endif
#endif

View File

@ -29,17 +29,12 @@
// //
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
#pragma once
#ifndef _MSC_VER // [ #ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!" #error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ] #endif // _MSC_VER ]
#ifndef _MSC_STDINT_H_ // [
#define _MSC_STDINT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <limits.h> #include <limits.h>
// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}' // For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
@ -228,5 +223,3 @@ typedef uint64_t uintmax_t;
#endif // __STDC_CONSTANT_MACROS ] #endif // __STDC_CONSTANT_MACROS ]
#endif // _MSC_STDINT_H_ ]

View File

@ -7,8 +7,7 @@
* For further details, see the file COPYING.LIB distributed with libmspack * For further details, see the file COPYING.LIB distributed with libmspack
*/ */
#ifndef MSPACK_SYSTEM_H #pragma once
#define MSPACK_SYSTEM_H 1
#ifdef _MSC_VER #ifdef _MSC_VER
#define inline #define inline
@ -62,5 +61,3 @@ static inline size_t strlen(const char *s) {
return e - s; return e - s;
} }
#endif #endif
#endif

View File

@ -10,6 +10,8 @@
* (GEnie : OUTER; CIS : [71755,204]) * (GEnie : OUTER; CIS : [71755,204])
*/ */
#pragma once
#undef D2_DES #undef D2_DES
#undef D3_DES #undef D3_DES

View File

@ -21,6 +21,8 @@
| URL: http://www.gnu.org/licenses/gpl.html | URL: http://www.gnu.org/licenses/gpl.html
*/ */
#pragma once
static unsigned long SP1[64] = { static unsigned long SP1[64] = {
0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L, 0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L, 0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,

View File

@ -6,7 +6,7 @@ Builtin recipes.
''' '''
recipe_modules = ['recipe_' + r for r in ( recipe_modules = ['recipe_' + r for r in (
'newsweek', 'atlantic', 'economist', 'portfolio', 'the_register', 'newsweek', 'atlantic', 'economist', 'portfolio', 'the_register',
'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj', 'usatoday', 'bbc', 'greader', 'wsj', #'outlook_india',
'wired', 'globe_and_mail', 'smh', 'espn', 'business_week', 'miami_herald', 'wired', 'globe_and_mail', 'smh', 'espn', 'business_week', 'miami_herald',
'ars_technica', 'upi', 'new_yorker', 'irish_times', 'lanacion', 'ars_technica', 'upi', 'new_yorker', 'irish_times', 'lanacion',
'discover_magazine', 'scientific_american', 'new_york_review_of_books', 'discover_magazine', 'scientific_american', 'new_york_review_of_books',

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: cp1252 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
@ -10,28 +11,62 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Honoluluadvertiser(BasicNewsRecipe): class Honoluluadvertiser(BasicNewsRecipe):
title = 'Honolulu Advertiser' title = 'Honolulu Advertiser'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic and Sujata Raman'
description = "Latest national and local Hawaii sports news from The Honolulu Advertiser." description = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
publisher = 'Honolulu Advertiser' publisher = 'Honolulu Advertiser'
category = 'news, Honolulu, Hawaii' category = 'news, Honolulu, Hawaii'
oldest_article = 2 oldest_article = 2
language = 'en' language = 'en'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
remove_javascript = True
cover_url = 'http://www.honoluluadvertiser.com/graphics/frontpage/frontpage.jpg'
conversion_options = { html2lrf_options = [
'comments' : description '--comment' , description
,'tags' : category , '--category' , category
,'language' : language , '--publisher' , publisher
,'publisher' : publisher ]
}
keep_only_tags = [dict(name='td')] html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [dict(name=['object','link'])] keep_only_tags = [dict(name='div', attrs={'class':["hon_article_top","article-bodytext","hon_article_photo","storyphoto","article"]}),
remove_attributes = ['style'] dict(name='div', attrs={'id':["storycontentleft","article"]})
]
remove_tags = [dict(name=['object','link','embed']),
dict(name='div', attrs={'class':["article-tools","titleBar","invisiblespacer","articleflex-container","hon_newslist","categoryheader","columnframe","subHeadline","poster-container"]}),
dict(name='div', attrs={'align':["right"]}),
dict(name='div', attrs={'id':["pluckcomments"]}),
dict(name='td', attrs={'class':["prepsfacts"]}),
dict(name='img', attrs={'height':["1"]}),
dict(name='img', attrs={'alt':["Advertisement"]}),
dict(name='img', attrs={'src':["/gcicommonfiles/sr/graphics/common/adlabel_horz.gif","/gcicommonfiles/sr/graphics/common/icon_whatsthis.gif",]}),
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; }
.hon_article_timestamp{font-family:Arial,Helvetica,sans-serif; font-size:70%; }
.postedStoryDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.postedDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.credit{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.hon_article_top{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%; font-weight:bold;}
.grayBackground{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%;}
.hon_photocaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.photoCaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.hon_photocredit{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;}
.storyphoto{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;}
.article-bodytext{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
.storycontentleft{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
#article{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
.contentarea{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
.storytext{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:xx-small;}
.storyHeadline{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; font-weight:bold;}
.source{font-family:Arial,Helvetica,sans-serif; color:#333333; font-style: italic; font-weight:bold; }
'''
feeds = [ feeds = [
(u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' ) (u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
@ -43,13 +78,19 @@ class Honoluluadvertiser(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
st = soup.find('td') for item in soup.findAll(style=True):
if st: del item['style']
st.name = 'div' mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
soup.head.insert(0,mtag)
for tag in soup.findAll(name=['span','table','font']):
tag.name = 'div'
return soup return soup
def print_version(self, url):
ubody, sep, rest = url.rpartition('?source') # def print_version(self, url):
root, sep2, article_id = ubody.partition('/article/') # ubody, sep, rest = url.rpartition('/-1/')
return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart' # root, sep2, article_id = ubody.partition('/article/')
# return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
@ -10,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheMiamiHerald(BasicNewsRecipe): class TheMiamiHerald(BasicNewsRecipe):
title = 'The Miami Herald' title = 'The Miami Herald'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic and Sujata Raman'
description = "Miami-Dade and Broward's source for the latest breaking local news on sports, weather, business, jobs, real estate, shopping, health, travel, entertainment, & more." description = "Miami-Dade and Broward's source for the latest breaking local news on sports, weather, business, jobs, real estate, shopping, health, travel, entertainment, & more."
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
@ -22,19 +21,28 @@ class TheMiamiHerald(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
remove_javascript = True remove_javascript = True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2lrf_options = [ extra_css = '''
'--comment' , description h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#1A272F; }
, '--category' , category .subheadline{font-family:Arial,Helvetica,sans-serif; font-size:30%; color: #666666;}
, '--publisher' , publisher #storyBodyContent{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
] .byline{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#58595B; }
.credit_line{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#58595B; }
.storyPublishDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666; }
.shirttail{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;font-style:italic }
.imageCaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666; }
'''
keep_only_tags = [dict(name='div', attrs={'id':['storyBody','storyPhotoContentArea']}),
]
remove_tags = [dict(name=['object','link','embed']),
dict(name='div', attrs={'class':["imageBuyButton","shareLinksArea","storyTools","spill_navigation pagination","circPromoArea","storyTools_footer","storyYahooContentMatch"]}) ,
dict(name='div', attrs={'id':["pluck","mlt","storyAssets"]}) ]
keep_only_tags = [dict(name='div', attrs={'id':'pageContainer'})]
feeds = [ feeds = [
(u'Breaking News' , u'http://www.miamiherald.com/416/index.xml' ) (u'Breaking News' , u'http://www.miamiherald.com/416/index.xml' )
,(u'Miami-Dade' , u'http://www.miamiherald.com/460/index.xml' ) ,(u'Miami-Dade' , u'http://www.miamiherald.com/460/index.xml' )
,(u'Broward' , u'http://www.miamiherald.com/467/index.xml' ) ,(u'Broward' , u'http://www.miamiherald.com/467/index.xml' )
,(u'Florida Keys' , u'http://www.miamiherald.com/505/index.xml' ) ,(u'Florida Keys' , u'http://www.miamiherald.com/505/index.xml' )
@ -49,6 +57,26 @@ class TheMiamiHerald(BasicNewsRecipe):
,(u'Environment' , u'http://www.miamiherald.com/573/index.xml' ) ,(u'Environment' , u'http://www.miamiherald.com/573/index.xml' )
] ]
def print_version(self, url):
return url.replace('/story/','/v-print/story/')
def get_article_url(self, article):
ans = article.get('guid', None)
print ans
try:
self.log('Looking for full story link in', ans)
soup = self.index_to_soup(ans)
x = soup.find(text="Full Story")
if x is not None:
a = x.parent
if a and a.has_key('href'):
ans = 'http://www.miamiherald.com'+a['href']
self.log('Found full story link', ans)
except:
pass
return ans

View File

@ -10,18 +10,64 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Moscowtimes(BasicNewsRecipe): class Moscowtimes(BasicNewsRecipe):
title = u'The Moscow Times' title = u'The Moscow Times'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic and Sujata Raman'
description = 'News from Russia' description = 'News from Russia'
language = 'en' language = 'en'
lang = 'en'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
#encoding = 'utf-8'
encoding = 'cp1252'
remove_javascript = True
conversion_options = {
'comment' : description
, 'language' : lang
}
extra_css = '''
h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large}
.article_date{ font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; color:#000000; font-size: x-small;}
.autors{color:#999999 ; font-weight: bold ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
.photoautors{ color:#999999 ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
.text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; }
'''
feeds = [ feeds = [
(u'The Moscow Times' , u'http://www.themoscowtimes.com/rss.xml' ) (u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'),
(u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'),
(u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'),
(u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'),
(u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'),
(u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion')
] ]
def print_version(self, url): keep_only_tags = [
return url + '&print=Y' dict(name='div', attrs={'class':['newstextblock']})
]
remove_tags = [
dict(name='div', attrs={'class':['photo_nav']})
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return self.adeify_images(soup)
def get_cover_url(self):
href = 'http://www.themoscowtimes.com/pdf/'
soup = self.index_to_soup(href)
div = soup.find('div',attrs={'class':'left'})
a = div.find('a')
print a
if a :
cover_url = a.img['src']
return cover_url

View File

@ -1,62 +1,43 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
msdn.microsoft.com/en-us/magazine msdn.microsoft.com/en-us/magazine
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class MSDNMagazine_en(BasicNewsRecipe): class MSDNMagazine_en(BasicNewsRecipe):
title = 'MSDN Magazine' title = 'MSDN Magazine'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'The Microsoft Journal for Developers' description = 'The Microsoft Journal for Developers'
publisher = 'Microsoft Press' publisher = 'Microsoft Press'
category = 'news, IT, Microsoft, programming, windows' category = 'news, IT, Microsoft, programming, windows'
oldest_article = 31 oldest_article = 31
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
remove_javascript = True language = 'en'
current_issue = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
language = 'en'
feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
html2lrf_options = [
'--comment', description keep_only_tags = [dict(name='div', attrs={'class':'navpage'})]
, '--category', category
, '--publisher', publisher remove_tags = [
] dict(name=['object','link','base','table'])
,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' ]
remove_tags_after = dict(name='div', attrs={'class':'navpage'})
feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
def preprocess_html(self, soup):
keep_only_tags = [dict(name='div', attrs={'class':'topic'})] for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
item.name="h2"
remove_tags = [ for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
dict(name=['object','link','base','table']) item.name="h1"
,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'}) for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
] item.name="h3"
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.current_issue)
link_item = soup.find('span',attrs={'class':'ContentsImageSpacer'})
if link_item:
imgt = link_item.find('img')
if imgt:
cover_url = imgt['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
item.name="h2"
for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
item.name="h1"
for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
item.name="h3"
return soup

View File

@ -10,10 +10,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
class OurDailyBread(BasicNewsRecipe): class OurDailyBread(BasicNewsRecipe):
title = 'Our Daily Bread' title = 'Our Daily Bread'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic and Sujata Raman'
description = 'Religion' description = 'Religion'
oldest_article = 15 oldest_article = 15
language = 'en' language = 'en'
lang = 'en'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
@ -21,16 +22,43 @@ class OurDailyBread(BasicNewsRecipe):
category = 'religion' category = 'religion'
encoding = 'utf-8' encoding = 'utf-8'
extra_css = ' #devoTitle{font-size: x-large; font-weight: bold} ' extra_css = ' #devoTitle{font-size: x-large; font-weight: bold} '
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
,'tags' : category ,'tags' : category
,'language' : 'en' ,'language' : 'en'
} }
keep_only_tags = [dict(name='div', attrs={'class':['altbg','text']})] keep_only_tags = [dict(name='div', attrs={'class':['altbg','text']})]
remove_tags = [dict(name='div', attrs={'id':['ctl00_cphPrimary_pnlBookCover']}),
dict(name='div', attrs={'class':['devotionalLinks']})
]
extra_css = '''
.text{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
.devotionalTitle{font-family:Arial,Helvetica,sans-serif; font-size:large; font-weight: bold;}
.devotionalDate{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.devotionalVerse{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
'''
feeds = [(u'Our Daily Bread', u'http://www.rbc.org/rss.ashx?id=50398')] feeds = [(u'Our Daily Bread', u'http://www.rbc.org/rss.ashx?id=50398')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return self.adeify_images(soup) return self.adeify_images(soup)
def get_cover_url(self):
href = 'http://www.rbc.org/index.aspx'
soup = self.index_to_soup(href)
a = soup.find('a',attrs={'id':'ctl00_hlTodaysDevotionalImage'})
if a :
cover_url = a.img['src']
return cover_url

View File

@ -1,57 +1,50 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.straitstimes.com www.straitstimes.com
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class StraitsTimes(BasicNewsRecipe): class StraitsTimes(BasicNewsRecipe):
title = 'The Straits Times' title = 'The Straits Times'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Singapore newspaper' description = 'Singapore newspaper'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
publisher = 'Singapore Press Holdings Ltd.' publisher = 'Singapore Press Holdings Ltd.'
category = 'news, politics, singapore, asia' category = 'news, politics, singapore, asia'
language = 'en' language = 'en'
extra_css = ' .top_headline{font-size: x-large; font-weight: bold} '
html2lrf_options = [ conversion_options = {
'--comment', description 'comments' : description
, '--category', category ,'tags' : category
, '--publisher', publisher ,'language' : language
, '--ignore-tables' ,'publisher' : publisher
] }
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' remove_tags = [dict(name=['object','link','map'])]
remove_tags = [ keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})]
dict(name=['object','link'])
,dict(name='table', attrs={'width':'980'}) feeds = [
,dict(name='td' , attrs={'class':'padlrt10'}) (u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' )
] ,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' )
,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' )
feeds = [ ,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' )
(u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' ) ,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' )
,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' ) ,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' )
,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' ) ,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' )
,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' ) ]
,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' )
,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' ) def preprocess_html(self, soup):
,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' ) for item in soup.findAll(style=True):
] del item['style']
return soup
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def print_version(self, url):
return url.replace('http://www.straitstimes.com','http://www.straitstimes.com/print')

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.torontosun.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TorontoSun(BasicNewsRecipe):
title = 'Toronto SUN'
__author__ = 'Darko Miletic'
description = 'News from Canada'
publisher = 'Toronto Sun'
category = 'news, politics, Canada'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
language = 'en_CA'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags =[
dict(name='div', attrs={'class':'articleHead'})
,dict(name='div', attrs={'id':'channelContent'})
]
remove_tags = [
dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']})
,dict(name=['link','iframe','object'])
,dict(name='a',attrs={'rel':'swap'})
,dict(name='ul',attrs={'class':'tabs dl contentSwap'})
]
remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'})
feeds = [
(u'News' , u'http://www.torontosun.com/news/rss.xml' )
,(u'Canada' , u'http://www.torontosun.com/news/canada/rss.xml' )
,(u'Columnists' , u'http://www.torontosun.com/news/columnists/rss.xml')
,(u'World' , u'http://www.torontosun.com/news/world/rss.xml' )
,(u'Money' , u'http://www.torontosun.com/money/rss.xml' )
]

View File

@ -8,7 +8,7 @@ Usage:
First use 'doAuth' to request the client authentication for a First use 'doAuth' to request the client authentication for a
certain resource. You should send an httplib.UNAUTHORIZED response to the certain resource. You should send an httplib.UNAUTHORIZED response to the
client so he knows he has to authenticate itself. client so he knows he has to authenticate itself.
Then use 'parseAuthorization' to retrieve the 'auth_map' used in Then use 'parseAuthorization' to retrieve the 'auth_map' used in
'checkResponse'. 'checkResponse'.
@ -30,27 +30,27 @@ __license__ = """
Copyright (c) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net> Copyright (c) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net>
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met: are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, * Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer. this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, * Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution. and/or other materials provided with the distribution.
* Neither the name of Sylvain Hellegouarch nor the names of his contributors * Neither the name of Sylvain Hellegouarch nor the names of his contributors
may be used to endorse or promote products derived from this software may be used to endorse or promote products derived from this software
without specific prior written permission. without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
""" """
@ -83,6 +83,7 @@ SUPPORTED_QOP = (AUTH, AUTH_INT)
# #
DIGEST_AUTH_ENCODERS = { DIGEST_AUTH_ENCODERS = {
MD5: lambda val: md5(val).hexdigest(), MD5: lambda val: md5(val).hexdigest(),
'md5': lambda val:md5(val).hexdigest(),
MD5_SESS: lambda val: md5(val).hexdigest(), MD5_SESS: lambda val: md5(val).hexdigest(),
# SHA: lambda val: sha(val).hexdigest(), # SHA: lambda val: sha(val).hexdigest(),
} }
@ -125,7 +126,7 @@ def doAuth (realm):
"""'doAuth' function returns the challenge string b giving priority over """'doAuth' function returns the challenge string b giving priority over
Digest and fallback to Basic authentication when the browser doesn't Digest and fallback to Basic authentication when the browser doesn't
support the first one. support the first one.
This should be set in the HTTP header under the key 'WWW-Authenticate'.""" This should be set in the HTTP header under the key 'WWW-Authenticate'."""
return digestAuth (realm) + " " + basicAuth (realm) return digestAuth (realm) + " " + basicAuth (realm)
@ -195,7 +196,7 @@ def parseAuthorization (credentials):
# #
def md5SessionKey (params, password): def md5SessionKey (params, password):
""" """
If the "algorithm" directive's value is "MD5-sess", then A1 If the "algorithm" directive's value is "MD5-sess", then A1
[the session key] is calculated only once - on the first request by the [the session key] is calculated only once - on the first request by the
client following receipt of a WWW-Authenticate challenge from the server. client following receipt of a WWW-Authenticate challenge from the server.
@ -340,24 +341,24 @@ AUTH_RESPONSES = {
def checkResponse (auth_map, password, method = "GET", encrypt=None, **kwargs): def checkResponse (auth_map, password, method = "GET", encrypt=None, **kwargs):
"""'checkResponse' compares the auth_map with the password and optionally """'checkResponse' compares the auth_map with the password and optionally
other arguments that each implementation might need. other arguments that each implementation might need.
If the response is of type 'Basic' then the function has the following If the response is of type 'Basic' then the function has the following
signature: signature:
checkBasicResponse (auth_map, password) -> bool checkBasicResponse (auth_map, password) -> bool
If the response is of type 'Digest' then the function has the following If the response is of type 'Digest' then the function has the following
signature: signature:
checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool
The 'A1' argument is only used in MD5_SESS algorithm based responses. The 'A1' argument is only used in MD5_SESS algorithm based responses.
Check md5SessionKey() for more info. Check md5SessionKey() for more info.
""" """
global AUTH_RESPONSES global AUTH_RESPONSES
checker = AUTH_RESPONSES[auth_map["auth_scheme"]] checker = AUTH_RESPONSES[auth_map["auth_scheme"]]
return checker (auth_map, password, method=method, encrypt=encrypt, **kwargs) return checker (auth_map, password, method=method, encrypt=encrypt, **kwargs)