diff --git a/resources/content_server/gui.js b/resources/content_server/gui.js index edefdbc5ef..b6a7f8a5d7 100644 --- a/resources/content_server/gui.js +++ b/resources/content_server/gui.js @@ -247,7 +247,7 @@ function setup_sorting() { }); $('table#book_list thead tr td').mouseout(function() { - this.style.backgroundColor = "inherit"; + this.style.backgroundColor = "transparent"; }); for (i = 0; i < cmap.length; i++) { diff --git a/resources/images/news/toronto_sun.png b/resources/images/news/toronto_sun.png new file mode 100644 index 0000000000..7b8384afb1 Binary files /dev/null and b/resources/images/news/toronto_sun.png differ diff --git a/setup/extensions.py b/setup/extensions.py index 17d82f3ff9..c2b8467300 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -102,6 +102,7 @@ extensions = [ libraries=['podofo'], lib_dirs=[podofo_lib], inc_dirs=[podofo_inc], + optional=True, error=podofo_error), Extension('pictureflow', diff --git a/setup/installer/windows/freeze.py b/setup/installer/windows/freeze.py index 6868903e72..dede19085a 100644 --- a/setup/installer/windows/freeze.py +++ b/setup/installer/windows/freeze.py @@ -6,13 +6,15 @@ __docformat__ = 'restructuredtext en' ''' Freeze app into executable using py2exe. ''' +import sys, os + QT_DIR = 'C:\\Qt\\4.5.2' LIBUSB_DIR = 'C:\\libusb' LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' -IMAGEMAGICK_DIR = 'C:\\ImageMagick' SW = r'C:\cygwin\home\kovid\sw' +IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.5.6', + 'VisualMagick', 'bin') -import sys def fix_module_finder(): # ModuleFinder can't handle runtime changes to __path__, but win32com uses them @@ -186,6 +188,15 @@ class BuildEXE(bc): for pat in ('*.dll', '*.sys', '*.cat', '*.inf'): for f in glob.glob(os.path.join(LIBUSB_DIR, pat)): shutil.copyfile(f, os.path.join(tdir, os.path.basename(f))) + # Copy ImageMagick + for pat in ('*.dll', '*.xml'): + for f in glob.glob(self.j(IMAGEMAGICK, pat)): + ok = True + for ex in ('magick++', 'x11.dll', 'xext.dll'): + if ex in f.lower(): ok = False + if not ok: continue + shutil.copy2(f, self.dll_dir) + print '\tAdding unrar' shutil.copyfile(LIBUNRAR, os.path.join(PY2EXE_DIR, os.path.basename(LIBUNRAR))) diff --git a/src/calibre/debug.py b/src/calibre/debug.py index 575308fe14..55e34c7963 100644 --- a/src/calibre/debug.py +++ b/src/calibre/debug.py @@ -190,6 +190,9 @@ def main(args=sys.argv): elif opts.develop_from is not None: develop_from(opts.develop_from) else: + from calibre.utils.config import config_dir + ipydir = os.path.join(config_dir, ('_' if iswindows else '.')+'ipython') + os.environ['IPYTHONDIR'] = ipydir from IPython.Shell import IPShellEmbed ipshell = IPShellEmbed() ipshell() diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py index 6cfe0ed132..e683392751 100644 --- a/src/calibre/devices/usbms/driver.py +++ b/src/calibre/devices/usbms/driver.py @@ -162,6 +162,7 @@ class USBMS(CLI, Device): @classmethod def book_from_path(cls, path): from calibre.ebooks.metadata.meta import path_to_ext + from calibre.ebooks.metadata import MetaInformation mime = mime_type_ext(path_to_ext(path)) if cls.settings().read_metadata or cls.MUST_READ_METADATA: @@ -171,6 +172,10 @@ class USBMS(CLI, Device): mi = metadata_from_filename(os.path.basename(path), re.compile(r'^(?P[ \S]+?)[ _]-[ _](?P<author>[ \S]+?)_+\d+')) + if mi is None: + mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], + [_('Unknown')]) + authors = authors_to_string(mi.authors) book = Book(path, mi.title, authors, mime) diff --git a/src/calibre/ebooks/pdf/fonts.h b/src/calibre/ebooks/pdf/fonts.h index c285b1dacc..55202c9573 100644 --- a/src/calibre/ebooks/pdf/fonts.h +++ b/src/calibre/ebooks/pdf/fonts.h @@ -4,9 +4,7 @@ */ - -#ifndef CALIBRE_REFLOW_FONTS -#define CALIBRE_REFLOW_FONTS +#pragma once #include <vector> #include <sstream> @@ -102,4 +100,3 @@ class Fonts : public vector<XMLFont*> { } -#endif diff --git a/src/calibre/ebooks/pdf/images.h b/src/calibre/ebooks/pdf/images.h index 2bdbc09d8d..7d6f143147 100644 --- a/src/calibre/ebooks/pdf/images.h +++ b/src/calibre/ebooks/pdf/images.h @@ -1,5 +1,4 @@ -#ifndef _CALIBRE_REFLOW_IMAGES -#define _CALIBRE_REFLOW_IMAGES +#pragma once #include <vector> #include <GfxState.h> @@ -127,4 +126,3 @@ namespace calibre_reflow { }; */ } -#endif diff --git a/src/calibre/ebooks/pdf/links.h b/src/calibre/ebooks/pdf/links.h index a84693ed0e..a8a3127a77 100644 --- a/src/calibre/ebooks/pdf/links.h +++ b/src/calibre/ebooks/pdf/links.h @@ -5,9 +5,7 @@ -#ifndef _CALIBRE_XML_LINKS -#define _CALIBRE_XML_LINKS - +#pragma once #include <vector> #include <sstream> @@ -65,5 +63,4 @@ class XMLLinks : public vector<XMLLink*> { } -#endif diff --git a/src/calibre/ebooks/pdf/reflow.h b/src/calibre/ebooks/pdf/reflow.h index fd629602b2..deb1dec326 100644 --- a/src/calibre/ebooks/pdf/reflow.h +++ b/src/calibre/ebooks/pdf/reflow.h @@ -4,8 +4,7 @@ * Based on pdftohtml from the poppler project. */ -#ifndef CALIBRE_REFLOW -#define CALIBRE_REFLOW +#pragma once #define UNICODE #ifdef _WIN32 @@ -248,4 +247,3 @@ class XMLOutputDev : public OutputDev { void process_link(Link* link); }; } -#endif diff --git a/src/calibre/ebooks/pdf/utils.h b/src/calibre/ebooks/pdf/utils.h index ae342d09da..43f435b1e3 100644 --- a/src/calibre/ebooks/pdf/utils.h +++ b/src/calibre/ebooks/pdf/utils.h @@ -4,9 +4,7 @@ */ -#ifndef _CALIBRE_REFLOW_UTILS -#define _CALIBRE_REFLOW_UTILS - +#pragma once #include <string> #include <sstream> @@ -45,4 +43,3 @@ inline string encode_for_xml(const string &sSrc ) } -#endif diff --git a/src/calibre/gui2/pictureflow/pictureflow.h b/src/calibre/gui2/pictureflow/pictureflow.h index 7431dee634..3e0b606d8a 100644 --- a/src/calibre/gui2/pictureflow/pictureflow.h +++ b/src/calibre/gui2/pictureflow/pictureflow.h @@ -55,8 +55,7 @@ THE SOFTWARE. */ -#ifndef PICTUREFLOW_H -#define PICTUREFLOW_H +#pragma once #include <QWidget> @@ -202,4 +201,3 @@ private: PictureFlowPrivate* d; }; -#endif // PICTUREFLOW_H diff --git a/src/calibre/trac/plugins/templates/linux.html b/src/calibre/trac/plugins/templates/linux.html index a166810b21..a55105d029 100644 --- a/src/calibre/trac/plugins/templates/linux.html +++ b/src/calibre/trac/plugins/templates/linux.html @@ -112,10 +112,13 @@ sudo python setup.py install </pre> Note that if your distribution does not have a correctly compiled libunrar.so, ${app} will not - support rar files. The calibre_postinstall step - is required for device detection and integration - with your desktop environment. - </p> + support rar files. In order to compile ${app} successfully + poppler headers must include XPdf headers. That is, poppler + must have been configured with --enable-xpdf-headers. Also, + some distributions have buggy libpng headers. See + <a href="https://bugs.launchpad.net/ubuntu/+source/libpng/+bug/218409">here</a> + for example. + </p> </div> </td> </tr> diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index e7bc06e7af..c9424717f0 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -411,7 +411,11 @@ class Config(ConfigInterface): if os.path.exists(self.config_file_path): try: with ExclusiveFile(self.config_file_path) as f: - src = f.read().decode('utf-8') + try: + src = f.read().decode('utf-8') + except ValueError: + print "Failed to parse", self.config_file_path + traceback.print_exc() except LockError: raise IOError('Could not lock config file: %s'%self.config_file_path) return self.option_set.parse_string(src) diff --git a/src/calibre/utils/ipc/job.py b/src/calibre/utils/ipc/job.py index 16ce85ace6..79db972008 100644 --- a/src/calibre/utils/ipc/job.py +++ b/src/calibre/utils/ipc/job.py @@ -131,7 +131,7 @@ class BaseJob(object): @property def details(self): - return self.log_file.read().decode('utf-8') + return self.log_file.read().decode('utf-8', 'replace') class ParallelJob(BaseJob): diff --git a/src/calibre/utils/lzx/lzc.h b/src/calibre/utils/lzx/lzc.h index 8a24f2c090..e817a7eb43 100644 --- a/src/calibre/utils/lzx/lzc.h +++ b/src/calibre/utils/lzx/lzc.h @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#pragma once + typedef struct lz_info lz_info; typedef int (*get_chars_t)(lz_info *lzi, int n, unsigned char *buf); typedef int (*output_match_t)(lz_info *lzi, int match_pos, int match_len); diff --git a/src/calibre/utils/lzx/lzxc.h b/src/calibre/utils/lzx/lzxc.h index 30da460a81..367094b817 100644 --- a/src/calibre/utils/lzx/lzxc.h +++ b/src/calibre/utils/lzx/lzxc.h @@ -16,6 +16,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#pragma once + #ifdef _MSC_VER #include "msstdint.h" #endif diff --git a/src/calibre/utils/lzx/lzxd.h b/src/calibre/utils/lzx/lzxd.h index 15ae17c0aa..ed9ea73273 100644 --- a/src/calibre/utils/lzx/lzxd.h +++ b/src/calibre/utils/lzx/lzxd.h @@ -10,10 +10,10 @@ * For further details, see the file COPYING.LIB distributed with libmspack */ +#pragma once + #include <sys/types.h> -#ifndef MSPACK_LZX_H -#define MSPACK_LZX_H 1 /* LZX compression / decompression definitions */ @@ -166,4 +166,3 @@ extern int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes); */ void lzxd_free(struct lzxd_stream *lzx); -#endif diff --git a/src/calibre/utils/lzx/lzxmodule.h b/src/calibre/utils/lzx/lzxmodule.h index d146694fd7..05159deb3b 100644 --- a/src/calibre/utils/lzx/lzxmodule.h +++ b/src/calibre/utils/lzx/lzxmodule.h @@ -3,13 +3,10 @@ * * Common declarations for Python module C glue code. */ +#pragma once #include <Python.h> -#ifndef LZXMODULE_H -#define LZXMODULE_H - extern PyObject *LZXError; extern PyTypeObject CompressorType; -#endif /* LZXMODULE_H */ diff --git a/src/calibre/utils/lzx/mspack.h b/src/calibre/utils/lzx/mspack.h index b48623fed0..e9d8a875a0 100644 --- a/src/calibre/utils/lzx/mspack.h +++ b/src/calibre/utils/lzx/mspack.h @@ -115,8 +115,7 @@ * - #MSPACK_ERR_DECRUNCH indicates an error occured during decompression. */ -#ifndef LIB_MSPACK_H -#define LIB_MSPACK_H 1 +#pragma once #ifdef __cplusplus extern "C" { @@ -1479,4 +1478,3 @@ struct mskwaj_decompressor { }; #endif -#endif diff --git a/src/calibre/utils/lzx/msstdint.h b/src/calibre/utils/lzx/msstdint.h index e032ff1605..51190b4fa5 100644 --- a/src/calibre/utils/lzx/msstdint.h +++ b/src/calibre/utils/lzx/msstdint.h @@ -29,17 +29,12 @@ // /////////////////////////////////////////////////////////////////////////////// +#pragma once + #ifndef _MSC_VER // [ #error "Use this header only with Microsoft Visual C++ compilers!" #endif // _MSC_VER ] -#ifndef _MSC_STDINT_H_ // [ -#define _MSC_STDINT_H_ - -#if _MSC_VER > 1000 -#pragma once -#endif - #include <limits.h> // For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}' @@ -228,5 +223,3 @@ typedef uint64_t uintmax_t; #endif // __STDC_CONSTANT_MACROS ] - -#endif // _MSC_STDINT_H_ ] diff --git a/src/calibre/utils/lzx/system.h b/src/calibre/utils/lzx/system.h index acc7d23f56..f586bbb495 100644 --- a/src/calibre/utils/lzx/system.h +++ b/src/calibre/utils/lzx/system.h @@ -7,8 +7,7 @@ * For further details, see the file COPYING.LIB distributed with libmspack */ -#ifndef MSPACK_SYSTEM_H -#define MSPACK_SYSTEM_H 1 +#pragma once #ifdef _MSC_VER #define inline @@ -62,5 +61,3 @@ static inline size_t strlen(const char *s) { return e - s; } #endif - -#endif diff --git a/src/calibre/utils/msdes/d3des.h b/src/calibre/utils/msdes/d3des.h index 08ce78656a..0b654b8337 100644 --- a/src/calibre/utils/msdes/d3des.h +++ b/src/calibre/utils/msdes/d3des.h @@ -10,6 +10,8 @@ * (GEnie : OUTER; CIS : [71755,204]) */ +#pragma once + #undef D2_DES #undef D3_DES diff --git a/src/calibre/utils/msdes/spr.h b/src/calibre/utils/msdes/spr.h index 1579892575..d61d5d332f 100644 --- a/src/calibre/utils/msdes/spr.h +++ b/src/calibre/utils/msdes/spr.h @@ -21,6 +21,8 @@ | URL: http://www.gnu.org/licenses/gpl.html */ +#pragma once + static unsigned long SP1[64] = { 0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L, 0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L, diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index cb52d41111..dc5b7664f7 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -6,7 +6,7 @@ Builtin recipes. ''' recipe_modules = ['recipe_' + r for r in ( 'newsweek', 'atlantic', 'economist', 'portfolio', 'the_register', - 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj', + 'usatoday', 'bbc', 'greader', 'wsj', #'outlook_india', 'wired', 'globe_and_mail', 'smh', 'espn', 'business_week', 'miami_herald', 'ars_technica', 'upi', 'new_yorker', 'irish_times', 'lanacion', 'discover_magazine', 'scientific_american', 'new_york_review_of_books', diff --git a/src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py b/src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py index 99a5d674ae..bc7f8cc874 100644 --- a/src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py +++ b/src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: cp1252 -*- __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' @@ -10,28 +11,62 @@ from calibre.web.feeds.news import BasicNewsRecipe class Honoluluadvertiser(BasicNewsRecipe): title = 'Honolulu Advertiser' - __author__ = 'Darko Miletic' + __author__ = 'Darko Miletic and Sujata Raman' description = "Latest national and local Hawaii sports news from The Honolulu Advertiser." publisher = 'Honolulu Advertiser' category = 'news, Honolulu, Hawaii' oldest_article = 2 - language = 'en' + language = 'en' + max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' + remove_javascript = True + cover_url = 'http://www.honoluluadvertiser.com/graphics/frontpage/frontpage.jpg' - conversion_options = { - 'comments' : description - ,'tags' : category - ,'language' : language - ,'publisher' : publisher - } + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher' , publisher + ] - keep_only_tags = [dict(name='td')] + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - remove_tags = [dict(name=['object','link'])] - remove_attributes = ['style'] + keep_only_tags = [dict(name='div', attrs={'class':["hon_article_top","article-bodytext","hon_article_photo","storyphoto","article"]}), + dict(name='div', attrs={'id':["storycontentleft","article"]}) + ] + + remove_tags = [dict(name=['object','link','embed']), + dict(name='div', attrs={'class':["article-tools","titleBar","invisiblespacer","articleflex-container","hon_newslist","categoryheader","columnframe","subHeadline","poster-container"]}), + dict(name='div', attrs={'align':["right"]}), + dict(name='div', attrs={'id':["pluckcomments"]}), + dict(name='td', attrs={'class':["prepsfacts"]}), + dict(name='img', attrs={'height':["1"]}), + dict(name='img', attrs={'alt':["Advertisement"]}), + dict(name='img', attrs={'src':["/gcicommonfiles/sr/graphics/common/adlabel_horz.gif","/gcicommonfiles/sr/graphics/common/icon_whatsthis.gif",]}), + ] + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; } + .hon_article_timestamp{font-family:Arial,Helvetica,sans-serif; font-size:70%; } + .postedStoryDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; } + .postedDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; } + .credit{font-family:Arial,Helvetica,sans-serif; font-size:30%; } + .hon_article_top{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%; font-weight:bold;} + .grayBackground{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%;} + .hon_photocaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; } + .photoCaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; } + .hon_photocredit{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;} + .storyphoto{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;} + .article-bodytext{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; } + .storycontentleft{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; } + #article{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; } + .contentarea{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; } + .storytext{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:xx-small;} + .storyHeadline{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; font-weight:bold;} + .source{font-family:Arial,Helvetica,sans-serif; color:#333333; font-style: italic; font-weight:bold; } + ''' feeds = [ (u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' ) @@ -43,13 +78,19 @@ class Honoluluadvertiser(BasicNewsRecipe): ] def preprocess_html(self, soup): - st = soup.find('td') - if st: - st.name = 'div' + for item in soup.findAll(style=True): + del item['style'] + mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n' + soup.head.insert(0,mtag) + + for tag in soup.findAll(name=['span','table','font']): + tag.name = 'div' + return soup - def print_version(self, url): - ubody, sep, rest = url.rpartition('?source') - root, sep2, article_id = ubody.partition('/article/') - return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart' + + # def print_version(self, url): + # ubody, sep, rest = url.rpartition('/-1/') + # root, sep2, article_id = ubody.partition('/article/') + # return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart' diff --git a/src/calibre/web/feeds/recipes/recipe_miami_herald.py b/src/calibre/web/feeds/recipes/recipe_miami_herald.py index 8488a2a9b3..4500b02097 100644 --- a/src/calibre/web/feeds/recipes/recipe_miami_herald.py +++ b/src/calibre/web/feeds/recipes/recipe_miami_herald.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' @@ -10,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class TheMiamiHerald(BasicNewsRecipe): title = 'The Miami Herald' - __author__ = 'Darko Miletic' + __author__ = 'Darko Miletic and Sujata Raman' description = "Miami-Dade and Broward's source for the latest breaking local news on sports, weather, business, jobs, real estate, shopping, health, travel, entertainment, & more." oldest_article = 1 max_articles_per_feed = 100 @@ -22,19 +21,28 @@ class TheMiamiHerald(BasicNewsRecipe): use_embedded_content = False encoding = 'cp1252' remove_javascript = True - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher' , publisher - ] + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#1A272F; } + .subheadline{font-family:Arial,Helvetica,sans-serif; font-size:30%; color: #666666;} + #storyBodyContent{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; } + .byline{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#58595B; } + .credit_line{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#58595B; } + .storyPublishDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666; } + .shirttail{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;font-style:italic } + .imageCaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666; } + ''' + keep_only_tags = [dict(name='div', attrs={'id':['storyBody','storyPhotoContentArea']}), + ] + + remove_tags = [dict(name=['object','link','embed']), + dict(name='div', attrs={'class':["imageBuyButton","shareLinksArea","storyTools","spill_navigation pagination","circPromoArea","storyTools_footer","storyYahooContentMatch"]}) , + dict(name='div', attrs={'id':["pluck","mlt","storyAssets"]}) ] - keep_only_tags = [dict(name='div', attrs={'id':'pageContainer'})] feeds = [ - (u'Breaking News' , u'http://www.miamiherald.com/416/index.xml' ) + (u'Breaking News' , u'http://www.miamiherald.com/416/index.xml' ) ,(u'Miami-Dade' , u'http://www.miamiherald.com/460/index.xml' ) ,(u'Broward' , u'http://www.miamiherald.com/467/index.xml' ) ,(u'Florida Keys' , u'http://www.miamiherald.com/505/index.xml' ) @@ -49,6 +57,26 @@ class TheMiamiHerald(BasicNewsRecipe): ,(u'Environment' , u'http://www.miamiherald.com/573/index.xml' ) ] - def print_version(self, url): - return url.replace('/story/','/v-print/story/') + + + + + def get_article_url(self, article): + ans = article.get('guid', None) + print ans + try: + self.log('Looking for full story link in', ans) + soup = self.index_to_soup(ans) + x = soup.find(text="Full Story") + + if x is not None: + a = x.parent + if a and a.has_key('href'): + ans = 'http://www.miamiherald.com'+a['href'] + self.log('Found full story link', ans) + except: + pass + return ans + + diff --git a/src/calibre/web/feeds/recipes/recipe_moscow_times.py b/src/calibre/web/feeds/recipes/recipe_moscow_times.py index 93dcb3d226..3105aba58e 100644 --- a/src/calibre/web/feeds/recipes/recipe_moscow_times.py +++ b/src/calibre/web/feeds/recipes/recipe_moscow_times.py @@ -10,18 +10,64 @@ from calibre.web.feeds.news import BasicNewsRecipe class Moscowtimes(BasicNewsRecipe): title = u'The Moscow Times' - __author__ = 'Darko Miletic' + __author__ = 'Darko Miletic and Sujata Raman' description = 'News from Russia' language = 'en' - + lang = 'en' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + #encoding = 'utf-8' + encoding = 'cp1252' + remove_javascript = True + conversion_options = { + 'comment' : description + , 'language' : lang + } + + extra_css = ''' + h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large} + .article_date{ font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; color:#000000; font-size: x-small;} + .autors{color:#999999 ; font-weight: bold ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; } + .photoautors{ color:#999999 ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; } + .text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; } + ''' feeds = [ - (u'The Moscow Times' , u'http://www.themoscowtimes.com/rss.xml' ) + (u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'), + (u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'), + (u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'), + (u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'), + (u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'), + (u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion') ] - def print_version(self, url): - return url + '&print=Y' \ No newline at end of file + keep_only_tags = [ + dict(name='div', attrs={'class':['newstextblock']}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['photo_nav']}) + ] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">' + soup.head.insert(0,mtag) + + return self.adeify_images(soup) + + + def get_cover_url(self): + + href = 'http://www.themoscowtimes.com/pdf/' + + soup = self.index_to_soup(href) + div = soup.find('div',attrs={'class':'left'}) + a = div.find('a') + print a + if a : + cover_url = a.img['src'] + return cover_url diff --git a/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py b/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py index 7fc5adb93a..3f043883fe 100644 --- a/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py +++ b/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py @@ -1,62 +1,43 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' -''' -msdn.microsoft.com/en-us/magazine -''' -from calibre.web.feeds.news import BasicNewsRecipe - -class MSDNMagazine_en(BasicNewsRecipe): - title = 'MSDN Magazine' - __author__ = 'Darko Miletic' - description = 'The Microsoft Journal for Developers' - publisher = 'Microsoft Press' - category = 'news, IT, Microsoft, programming, windows' - oldest_article = 31 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - remove_javascript = True - current_issue = 'http://msdn.microsoft.com/en-us/magazine/default.aspx' - language = 'en' - - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')] - - keep_only_tags = [dict(name='div', attrs={'class':'topic'})] - - remove_tags = [ - dict(name=['object','link','base','table']) - ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'}) - ] - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.current_issue) - link_item = soup.find('span',attrs={'class':'ContentsImageSpacer'}) - if link_item: - imgt = link_item.find('img') - if imgt: - cover_url = imgt['src'] - return cover_url - - - def preprocess_html(self, soup): - for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}): - item.name="h2" - for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}): - item.name="h1" - for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}): - item.name="h3" - return soup - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' +''' +msdn.microsoft.com/en-us/magazine +''' +from calibre.web.feeds.news import BasicNewsRecipe + +class MSDNMagazine_en(BasicNewsRecipe): + title = 'MSDN Magazine' + __author__ = 'Darko Miletic' + description = 'The Microsoft Journal for Developers' + publisher = 'Microsoft Press' + category = 'news, IT, Microsoft, programming, windows' + oldest_article = 31 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'en' + + + + feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')] + + keep_only_tags = [dict(name='div', attrs={'class':'navpage'})] + + remove_tags = [ + dict(name=['object','link','base','table']) + ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'}) + ] + remove_tags_after = dict(name='div', attrs={'class':'navpage'}) + + def preprocess_html(self, soup): + for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}): + item.name="h2" + for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}): + item.name="h1" + for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}): + item.name="h3" + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_ourdailybread.py b/src/calibre/web/feeds/recipes/recipe_ourdailybread.py index f1241da408..6987ab2325 100644 --- a/src/calibre/web/feeds/recipes/recipe_ourdailybread.py +++ b/src/calibre/web/feeds/recipes/recipe_ourdailybread.py @@ -10,10 +10,11 @@ from calibre.web.feeds.news import BasicNewsRecipe class OurDailyBread(BasicNewsRecipe): title = 'Our Daily Bread' - __author__ = 'Darko Miletic' + __author__ = 'Darko Miletic and Sujata Raman' description = 'Religion' oldest_article = 15 language = 'en' + lang = 'en' max_articles_per_feed = 100 no_stylesheets = True @@ -21,16 +22,43 @@ class OurDailyBread(BasicNewsRecipe): category = 'religion' encoding = 'utf-8' extra_css = ' #devoTitle{font-size: x-large; font-weight: bold} ' - - conversion_options = { + + conversion_options = { 'comments' : description ,'tags' : category ,'language' : 'en' } - + keep_only_tags = [dict(name='div', attrs={'class':['altbg','text']})] + remove_tags = [dict(name='div', attrs={'id':['ctl00_cphPrimary_pnlBookCover']}), + dict(name='div', attrs={'class':['devotionalLinks']}) + ] + extra_css = ''' + .text{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + .devotionalTitle{font-family:Arial,Helvetica,sans-serif; font-size:large; font-weight: bold;} + .devotionalDate{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .devotionalVerse{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; } + ''' + feeds = [(u'Our Daily Bread', u'http://www.rbc.org/rss.ashx?id=50398')] def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">' + soup.head.insert(0,mtag) + return self.adeify_images(soup) + + def get_cover_url(self): + + href = 'http://www.rbc.org/index.aspx' + + soup = self.index_to_soup(href) + a = soup.find('a',attrs={'id':'ctl00_hlTodaysDevotionalImage'}) + + if a : + cover_url = a.img['src'] + + return cover_url diff --git a/src/calibre/web/feeds/recipes/recipe_straitstimes.py b/src/calibre/web/feeds/recipes/recipe_straitstimes.py index 9a87f03a6d..522e8f9ff5 100644 --- a/src/calibre/web/feeds/recipes/recipe_straitstimes.py +++ b/src/calibre/web/feeds/recipes/recipe_straitstimes.py @@ -1,57 +1,50 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' -''' -www.straitstimes.com -''' - -from calibre.web.feeds.recipes import BasicNewsRecipe - -class StraitsTimes(BasicNewsRecipe): - title = 'The Straits Times' - __author__ = 'Darko Miletic' - description = 'Singapore newspaper' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - publisher = 'Singapore Press Holdings Ltd.' - category = 'news, politics, singapore, asia' - language = 'en' - - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - , '--ignore-tables' - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' - - remove_tags = [ - dict(name=['object','link']) - ,dict(name='table', attrs={'width':'980'}) - ,dict(name='td' , attrs={'class':'padlrt10'}) - ] - - feeds = [ - (u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' ) - ,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' ) - ,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' ) - ,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' ) - ,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' ) - ,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' ) - ,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' ) - ] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup - - def print_version(self, url): - return url.replace('http://www.straitstimes.com','http://www.straitstimes.com/print') - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' +''' +www.straitstimes.com +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class StraitsTimes(BasicNewsRecipe): + title = 'The Straits Times' + __author__ = 'Darko Miletic' + description = 'Singapore newspaper' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + publisher = 'Singapore Press Holdings Ltd.' + category = 'news, politics, singapore, asia' + language = 'en' + extra_css = ' .top_headline{font-size: x-large; font-weight: bold} ' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + remove_tags = [dict(name=['object','link','map'])] + + keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})] + + feeds = [ + (u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' ) + ,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' ) + ,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' ) + ,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' ) + ,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' ) + ,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' ) + ,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' ) + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_toronto_sun.py b/src/calibre/web/feeds/recipes/recipe_toronto_sun.py new file mode 100644 index 0000000000..996b27c1bd --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_toronto_sun.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' +''' +www.torontosun.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TorontoSun(BasicNewsRecipe): + title = 'Toronto SUN' + __author__ = 'Darko Miletic' + description = 'News from Canada' + publisher = 'Toronto Sun' + category = 'news, politics, Canada' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + language = 'en_CA' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags =[ + dict(name='div', attrs={'class':'articleHead'}) + ,dict(name='div', attrs={'id':'channelContent'}) + ] + remove_tags = [ + dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']}) + ,dict(name=['link','iframe','object']) + ,dict(name='a',attrs={'rel':'swap'}) + ,dict(name='ul',attrs={'class':'tabs dl contentSwap'}) + ] + + remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'}) + + feeds = [ + (u'News' , u'http://www.torontosun.com/news/rss.xml' ) + ,(u'Canada' , u'http://www.torontosun.com/news/canada/rss.xml' ) + ,(u'Columnists' , u'http://www.torontosun.com/news/columnists/rss.xml') + ,(u'World' , u'http://www.torontosun.com/news/world/rss.xml' ) + ,(u'Money' , u'http://www.torontosun.com/money/rss.xml' ) + ] diff --git a/src/cherrypy/lib/httpauth.py b/src/cherrypy/lib/httpauth.py index 083f4c5f9e..ea8448d339 100644 --- a/src/cherrypy/lib/httpauth.py +++ b/src/cherrypy/lib/httpauth.py @@ -8,7 +8,7 @@ Usage: First use 'doAuth' to request the client authentication for a certain resource. You should send an httplib.UNAUTHORIZED response to the client so he knows he has to authenticate itself. - + Then use 'parseAuthorization' to retrieve the 'auth_map' used in 'checkResponse'. @@ -30,27 +30,27 @@ __license__ = """ Copyright (c) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net> All rights reserved. -Redistribution and use in source and binary forms, with or without modification, +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notice, + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Sylvain Hellegouarch nor the names of his contributors - may be used to endorse or promote products derived from this software + * Neither the name of Sylvain Hellegouarch nor the names of his contributors + may be used to endorse or promote products derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ @@ -83,6 +83,7 @@ SUPPORTED_QOP = (AUTH, AUTH_INT) # DIGEST_AUTH_ENCODERS = { MD5: lambda val: md5(val).hexdigest(), + 'md5': lambda val:md5(val).hexdigest(), MD5_SESS: lambda val: md5(val).hexdigest(), # SHA: lambda val: sha(val).hexdigest(), } @@ -125,7 +126,7 @@ def doAuth (realm): """'doAuth' function returns the challenge string b giving priority over Digest and fallback to Basic authentication when the browser doesn't support the first one. - + This should be set in the HTTP header under the key 'WWW-Authenticate'.""" return digestAuth (realm) + " " + basicAuth (realm) @@ -195,7 +196,7 @@ def parseAuthorization (credentials): # def md5SessionKey (params, password): """ - If the "algorithm" directive's value is "MD5-sess", then A1 + If the "algorithm" directive's value is "MD5-sess", then A1 [the session key] is calculated only once - on the first request by the client following receipt of a WWW-Authenticate challenge from the server. @@ -340,24 +341,24 @@ AUTH_RESPONSES = { def checkResponse (auth_map, password, method = "GET", encrypt=None, **kwargs): """'checkResponse' compares the auth_map with the password and optionally other arguments that each implementation might need. - + If the response is of type 'Basic' then the function has the following signature: - + checkBasicResponse (auth_map, password) -> bool - + If the response is of type 'Digest' then the function has the following signature: - + checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool - + The 'A1' argument is only used in MD5_SESS algorithm based responses. Check md5SessionKey() for more info. """ global AUTH_RESPONSES checker = AUTH_RESPONSES[auth_map["auth_scheme"]] return checker (auth_map, password, method=method, encrypt=encrypt, **kwargs) - +