Merge from trunk

This commit is contained in:
Charles Haley 2011-03-09 08:21:23 +00:00
commit 9638baa6ee
7 changed files with 444 additions and 266 deletions

View File

@ -1,7 +1,20 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
# Users of Kindle 3 (with limited system-level CJK support)
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn it to True if your device supports display of CJK titles
__UseChineseTitle__ = False
'''
Change Log:
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
folder in Kindle 3
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
clean up the indentation
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
@ -19,21 +32,17 @@ import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
class MPHKRecipe(BasicNewsRecipe):
IsCJKWellSupported = True # Set to False to avoid generating periodical in which CJK characters can't be displayed in section/article view
title = 'Ming Pao - Hong Kong'
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
description = ('Hong Kong Chinese Newspaper (http://news.mingpao.com). If'
'you are using a Kindle with firmware < 3.1, customize the'
'recipe')
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
publisher = 'MingPao'
category = 'Chinese, News, Hong Kong'
remove_javascript = True
@ -48,12 +57,14 @@ class MPHKRecipe(BasicNewsRecipe):
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(attrs={'id':['newscontent']}), # entertainment page content
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']})
]
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']})] # for the finance page
dict(attrs={'id':['newscontent135']}), # for the finance page
dict(name='table')] # for content fetched from life.mingpao.com
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
@ -61,7 +72,12 @@ class MPHKRecipe(BasicNewsRecipe):
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: '')
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
def image_url_processor(cls, baseurl, url):
@ -129,28 +145,55 @@ class MPHKRecipe(BasicNewsRecipe):
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
return feeds
def parse_section(self, url):
@ -171,15 +214,33 @@ class MPHKRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
dateStr = self.get_fetchdate()
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
included_urls = []
for i in a:
url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i)
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
@ -201,6 +262,22 @@ class MPHKRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
def parse_col_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
@ -213,18 +290,18 @@ class MPHKRecipe(BasicNewsRecipe):
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if self.IsCJKWellSupported == True:
# use Chinese title
title = u'\u660e\u5831 (\u9999\u6e2f) ' + self.get_fetchformatteddate()
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
# use English title
title = self.short_title() + ' ' + self.get_fetchformatteddate()
if True: # force date in title
# title += strftime(self.timefmt)
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if self.IsCJKWellSupported == True:
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()

View File

@ -30,6 +30,7 @@ def strftime(epoch, zone=time.gmtime):
def get_connected_device():
from calibre.customize.ui import device_plugins
from calibre.devices.scanner import DeviceScanner
import uuid
dev = None
scanner = DeviceScanner()
scanner.scan()
@ -47,7 +48,7 @@ def get_connected_device():
for d in connected_devices:
try:
d.open()
d.open(str(uuid.uuid4()))
except:
continue
else:

View File

@ -6,7 +6,7 @@ Provides a command-line and optional graphical interface to the SONY Reader PRS-
For usage information run the script.
"""
import StringIO, sys, time, os
import StringIO, sys, time, os, uuid
from optparse import OptionParser
from calibre import __version__, __appname__
@ -213,7 +213,7 @@ def main():
for d in connected_devices:
try:
d.open()
d.open(str(uuid.uuid4()))
except:
continue
else:

View File

@ -5,7 +5,6 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys
from functools import partial
from PyQt4.Qt import QComboBox, QLabel, QSpinBox, QDoubleSpinBox, QDateEdit, \
@ -85,7 +84,7 @@ class Int(Base):
self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent),
QSpinBox(parent)]
w = self.widgets[1]
w.setRange(-100, sys.maxint)
w.setRange(-100, 100000000)
w.setSpecialValueText(_('Undefined'))
w.setSingleStep(1)
@ -108,7 +107,7 @@ class Float(Int):
self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent),
QDoubleSpinBox(parent)]
w = self.widgets[1]
w.setRange(-100., float(sys.maxint))
w.setRange(-100., float(100000000))
w.setDecimals(2)
w.setSpecialValueText(_('Undefined'))
w.setSingleStep(1)
@ -289,7 +288,7 @@ class Series(Base):
self.widgets.append(QLabel('&'+self.col_metadata['name']+_(' index:'), parent))
w = QDoubleSpinBox(parent)
w.setRange(-100., float(sys.maxint))
w.setRange(-100., float(100000000))
w.setDecimals(2)
w.setSpecialValueText(_('Undefined'))
w.setSingleStep(1)
@ -595,7 +594,7 @@ class BulkInt(BulkBase):
def setup_ui(self, parent):
self.make_widgets(parent, QSpinBox)
self.main_widget.setRange(-100, sys.maxint)
self.main_widget.setRange(-100, 100000000)
self.main_widget.setSpecialValueText(_('Undefined'))
self.main_widget.setSingleStep(1)
@ -617,7 +616,7 @@ class BulkFloat(BulkInt):
def setup_ui(self, parent):
self.make_widgets(parent, QDoubleSpinBox)
self.main_widget.setRange(-100., float(sys.maxint))
self.main_widget.setRange(-100., float(100000000))
self.main_widget.setDecimals(2)
self.main_widget.setSpecialValueText(_('Undefined'))
self.main_widget.setSingleStep(1)

View File

@ -5,7 +5,6 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys
from math import cos, sin, pi
from PyQt4.Qt import QColor, Qt, QModelIndex, QSize, \
@ -245,13 +244,13 @@ class CcTextDelegate(QStyledItemDelegate): # {{{
typ = m.custom_columns[col]['datatype']
if typ == 'int':
editor = QSpinBox(parent)
editor.setRange(-100, sys.maxint)
editor.setRange(-100, 100000000)
editor.setSpecialValueText(_('Undefined'))
editor.setSingleStep(1)
elif typ == 'float':
editor = QDoubleSpinBox(parent)
editor.setSpecialValueText(_('Undefined'))
editor.setRange(-100., float(sys.maxint))
editor.setRange(-100., 100000000)
editor.setDecimals(2)
else:
editor = MultiCompleteLineEdit(parent)

View File

@ -95,6 +95,26 @@ class DrawingWand(_magick.DrawingWand): # {{{
self.font_size_ = float(val)
return property(fget=fget, fset=fset, doc=_magick.DrawingWand.font_size_.__doc__)
@dynamic_property
def stroke_color(self):
def fget(self):
return self.stroke_color_.color
def fset(self, val):
col = PixelWand()
col.color = unicode(val)
self.stroke_color_ = col
return property(fget=fget, fset=fset, doc=_magick.DrawingWand.font_size_.__doc__)
@dynamic_property
def fill_color(self):
def fget(self):
return self.fill_color_.color
def fset(self, val):
col = PixelWand()
col.color = unicode(val)
self.fill_color_ = col
return property(fget=fget, fset=fset, doc=_magick.DrawingWand.font_size_.__doc__)
# }}}
class Image(_magick.Image): # {{{

View File

@ -263,6 +263,78 @@ magick_DrawingWand_fontsize_setter(magick_DrawingWand *self, PyObject *val, void
// }}}
// DrawingWand.stroke_color {{{
static PyObject *
magick_DrawingWand_stroke_color_getter(magick_DrawingWand *self, void *closure) {
NULL_CHECK(NULL)
magick_PixelWand *pw;
PixelWand *wand = NewPixelWand();
if (wand == NULL) return PyErr_NoMemory();
DrawGetStrokeColor(self->wand, wand);
pw = (magick_PixelWand*) magick_PixelWandType.tp_alloc(&magick_PixelWandType, 0);
if (pw == NULL) return PyErr_NoMemory();
pw->wand = wand;
return Py_BuildValue("O", (PyObject *)pw);
}
static int
magick_DrawingWand_stroke_color_setter(magick_DrawingWand *self, PyObject *val, void *closure) {
NULL_CHECK(-1)
if (val == NULL) {
PyErr_SetString(PyExc_TypeError, "Cannot delete DrawingWand stroke color");
return -1;
}
magick_PixelWand *pw;
pw = (magick_PixelWand*)val;
if (!IsPixelWand(pw->wand)) { PyErr_SetString(PyExc_TypeError, "Invalid PixelWand"); return -1; }
DrawSetStrokeColor(self->wand, pw->wand);
return 0;
}
// }}}
// DrawingWand.fill_color {{{
static PyObject *
magick_DrawingWand_fill_color_getter(magick_DrawingWand *self, void *closure) {
NULL_CHECK(NULL)
magick_PixelWand *pw;
PixelWand *wand = NewPixelWand();
if (wand == NULL) return PyErr_NoMemory();
DrawGetFillColor(self->wand, wand);
pw = (magick_PixelWand*) magick_PixelWandType.tp_alloc(&magick_PixelWandType, 0);
if (pw == NULL) return PyErr_NoMemory();
pw->wand = wand;
return Py_BuildValue("O", (PyObject *)pw);
}
static int
magick_DrawingWand_fill_color_setter(magick_DrawingWand *self, PyObject *val, void *closure) {
NULL_CHECK(-1)
if (val == NULL) {
PyErr_SetString(PyExc_TypeError, "Cannot delete DrawingWand fill color");
return -1;
}
magick_PixelWand *pw;
pw = (magick_PixelWand*)val;
if (!IsPixelWand(pw->wand)) { PyErr_SetString(PyExc_TypeError, "Invalid PixelWand"); return -1; }
DrawSetFillColor(self->wand, pw->wand);
return 0;
}
// }}}
// DrawingWand.text_antialias {{{
static PyObject *
magick_DrawingWand_textantialias_getter(magick_DrawingWand *self, void *closure) {
@ -336,6 +408,16 @@ static PyGetSetDef magick_DrawingWand_getsetters[] = {
(char *)"DrawingWand fontsize",
NULL},
{(char *)"stroke_color_",
(getter)magick_DrawingWand_stroke_color_getter, (setter)magick_DrawingWand_stroke_color_setter,
(char *)"DrawingWand stroke color",
NULL},
{(char *)"fill_color_",
(getter)magick_DrawingWand_fill_color_getter, (setter)magick_DrawingWand_fill_color_setter,
(char *)"DrawingWand fill color",
NULL},
{(char *)"text_antialias",
(getter)magick_DrawingWand_textantialias_getter, (setter)magick_DrawingWand_textantialias_setter,
(char *)"DrawingWand text antialias",