Merge from trunk

This commit is contained in:
Charles Haley 2011-01-05 19:18:09 +00:00
commit 3628142f6a
8 changed files with 423 additions and 6 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.1 KiB

View File

@ -0,0 +1,86 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.arabianbusiness.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Arabian_Business(BasicNewsRecipe):
title = 'Arabian Business'
__author__ = 'Darko Miletic'
description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.'
publisher = 'Arabian Business Publishing Ltd.'
category = 'ArabianBusiness.com,Arab Business News,Middle East Business News,Middle East Business,Arab Media News,Industry Events,Middle East Industry News,Arab Business Industry,Dubai Business News,Financial News,UAE Business News,Middle East Press Releases,Gulf News,Arab News,GCC Business News,Banking Finance,Media Marketing,Construction,Oil Gas,Retail,Transportation,Travel Hospitality,Photos,Videos,Life Style,Fashion,United Arab Emirates,UAE,Dubai,Sharjah,Abu Dhabi,Qatar,KSA,Saudi Arabia,Bahrain,Kuwait,Oman,Europe,South Asia,America,Asia,news'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newsportal'
masthead_url = 'http://www.arabianbusiness.com/skins/ab.main/gfx/arabianbusiness_logo_sm.gif'
extra_css = """
body{font-family: Georgia,serif }
img{margin-bottom: 0.4em; margin-top: 0.4em; display:block}
.byline,.dateline{font-size: small; display: inline; font-weight: bold}
ul{list-style: none outside none;}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags_before=dict(attrs={'id':'article-title'})
remove_tags = [
dict(name=['meta','link','base','iframe','embed','object'])
,dict(attrs={'class':'printfooter'})
]
remove_attributes=['lang']
feeds = [
(u'Africa' , u'http://www.arabianbusiness.com/world/Africa/?service=rss' )
,(u'Americas' , u'http://www.arabianbusiness.com/world/americas/?service=rss' )
,(u'Asia Pacific' , u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss' )
,(u'Europe' , u'http://www.arabianbusiness.com/world/europe/?service=rss' )
,(u'Middle East' , u'http://www.arabianbusiness.com/world/middle-east/?service=rss' )
,(u'South Asia' , u'http://www.arabianbusiness.com/world/south-asia/?service=rss' )
,(u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss' )
,(u'Construction' , u'http://www.arabianbusiness.com/industries/construction/?service=rss' )
,(u'Education' , u'http://www.arabianbusiness.com/industries/education/?service=rss' )
,(u'Energy' , u'http://www.arabianbusiness.com/industries/energy/?service=rss' )
,(u'Healthcare' , u'http://www.arabianbusiness.com/industries/healthcare/?service=rss' )
,(u'Media' , u'http://www.arabianbusiness.com/industries/media/?service=rss' )
,(u'Real Estate' , u'http://www.arabianbusiness.com/industries/real-estate/?service=rss' )
,(u'Retail' , u'http://www.arabianbusiness.com/industries/retail/?service=rss' )
,(u'Technology' , u'http://www.arabianbusiness.com/industries/technology/?service=rss' )
,(u'Transport' , u'http://www.arabianbusiness.com/industries/transport/?service=rss' )
,(u'Travel' , u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss')
,(u'Equities' , u'http://www.arabianbusiness.com/markets/equities/?service=rss' )
,(u'Commodities' , u'http://www.arabianbusiness.com/markets/commodities/?service=rss' )
,(u'Currencies' , u'http://www.arabianbusiness.com/markets/currencies/?service=rss' )
,(u'Market Data' , u'http://www.arabianbusiness.com/markets/market-data/?service=rss' )
,(u'Comment' , u'http://www.arabianbusiness.com/opinion/comment/?service=rss' )
,(u'Think Tank' , u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss' )
,(u'Arts' , u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss' )
,(u'Cars' , u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss' )
,(u'Food' , u'http://www.arabianbusiness.com/lifestyle/food/?service=rss' )
,(u'Sport' , u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss' )
]
def print_version(self, url):
return url + '?service=printer&page='
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -0,0 +1,70 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Gerardo Diez'
__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
__docformat__ = 'restructuredtext en'
'''
deia.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Deia(BasicNewsRecipe):
title ='Deia'
__author__ ='Gerardo Diez'
publisher ='Editorial Iparraguirre, S.A'
category ='news, politics, finances, world, spain, euskadi'
publication_type ='newspaper'
oldest_article =1
max_articles_per_feed =100
simultaneous_downloads =10
cover_url ='http://2.bp.blogspot.com/_RjrWzC6tI14/TM6jrPLaBZI/AAAAAAAAFaI/ayffwxidFEY/s1600/2009-10-13-logo-deia.jpg'
timefmt ='[%a, %d %b, %Y]'
encoding ='utf8'
language ='es_ES'
remove_javascript =True
remove_tags_after =dict(id='Texto')
remove_tags_before =dict(id='Texto')
remove_tags =[dict(name='div', attrs={'class':['Herramientas ', 'Multimedia']})]
no_stylesheets =True
extra_css ='h1 {margin-bottom: .15em;font-size: 2.7em; font-family: Georgia, "Times New Roman", Times, serif;} .Antetitulo {margin: 1em 0;text-transform: uppercase;color: #999;} .PieFoto {margin: .1em 0;padding: .5em .5em .5em .5em;background: #F0F0F0;} .PieFoto p {margin-bottom: 0;font-family: Georgia,"Times New Roman",Times,serif;font-weight: bold; font-style: italic; color: #666;}'
keep_only_tags =[dict(name='div', attrs={'class':['Texto ', 'NoticiaFicha ']})]
feeds = [
(u'Bizkaia' ,u'http://www.deia.com/index.php/services/rss?seccion=bizkaia'),
(u'Bilbao' ,u'http://www.deia.com/index.php/services/rss?seccion=bilbao'),
(u'Hemendik eta Handik' ,u'http://www.deia.com/index.php/services/rss?seccion=hemendik-eta-handik'),
(u'Margen Derecha' ,u'http://www.deia.com/index.php/services/rss?seccion=margen-derecha'),
(u'Encartaciones y Margen Izquierda' ,u'http://www.deia.com/index.php/services/rss?seccion=margen-izquierda-encartaciones'),
(u'Costa' ,u'http://www.deia.com/index.php/services/rss?seccion=costa'),
(u'Duranguesado' ,u'http://www.deia.com/index.php/services/rss?seccion=duranguesado'),
(u'Llodio-Nervión' ,u'http://www.deia.com/index.php/services/rss?seccion=llodio-nervion'),
(u'Arratia-Nervión' ,u'http://www.deia.com/index.php/services/rss?seccion=arratia-nervion'),
(u'Uribe-Txorierri' ,u'http://www.deia.com/index.php/services/rss?seccion=uribe-txorierri'),
(u'Ecos de sociedad' ,u'http://www.deia.com/index.php/services/rss?seccion=ecos-de-sociedad'),
(u'Sucesos' ,u'http://www.deia.com/index.php/services/rss?seccion=sucesos'),
(u'Política' ,u'http://www.deia.com/index.php/services/rss?seccion=politica'),
(u'Euskadi' ,u'http://www.deia.com/index.php/services/rss?seccion=politica/euskadi'),
(u'España' ,u'http://www.deia.com/index.php/services/rss?seccion=politica/espana'),
(u'Sociedad',u'http://www.deia.com/index.php/services/rss?seccion=sociedad'),
(u'Euskadi' ,u'http://www.deia.com/index.php/services/rss?seccion=socidad/euskadi'),
(u'Sociedad.España' ,u'http://www.deia.com/index.php/services/rss?seccion=sociedad/espana'),
(u'Ocio y Cultura' ,u'http://www.deia.com/index.php/services/rss?seccion=ocio-y-cultura'),
#(u'Cultura' ,u'http://www.deia.com/index.php/services/rss?seccion=cultura'),
#(u'Ocio' ,u'http://www.deia.com/index.php/services/rss?seccion=ocio'),
(u'On' ,u'http://www.deia.com/index.php/services/rss?seccion=on'),
(u'Agenda' ,u'http://www.deia.com/index.php/services/rss?seccion=agenda'),
(u'Comunicación' ,u'http://www.deia.com/index.php/services/rss?seccion=comunicacion'),
(u'Viajes' ,u'http://www.deia.com/index.php/services/rss?seccion=viajes'),
(u'¡Mundo!' ,u'http://www.deia.com/index.php/services/rss?seccion=que-mundo'),
(u'Humor' ,u'http://www.deia.com/index.php/services/rss?seccion=humor'),
(u'Opinión' ,u'http://www.deia.com/index.php/services/rss?seccion=opinion'),
(u'Editorial' ,u'http://www.deia.com/index.php/services/rss?seccion=editorial'),
(u'Tribuna abierta' ,u'http://www.deia.com/index.php/services/rss?seccion=tribuna-abierta'),
(u'Colaboración' ,u'http://www.deia.com/index.php/services/rss?seccion=colaboracion'),
(u'Columnistas' ,u'http://www.deia.com/index.php/services/rss?seccion=columnistas'),
(u'Deportes' ,u'http://www.deia.com/index.php/services/rss?seccion=deportes'),
(u'Athletic' ,u'http://www.deia.com/index.php/services/rss?seccion=athletic'),
(u'Economía' ,'http://www.deia.com/index.php/services/rss?seccion=economia'),
(u'Mundo' ,u'http://www.deia.com/index.php/services/rss?seccion=mundo')]

View File

@ -127,9 +127,6 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="5" column="1" colspan="3">
<widget class="QLineEdit" name="opt_input_encoding"/>
</item>
<item row="6" column="0" colspan="2"> <item row="6" column="0" colspan="2">
<widget class="QCheckBox" name="opt_remove_paragraph_spacing"> <widget class="QCheckBox" name="opt_remove_paragraph_spacing">
<property name="text"> <property name="text">
@ -244,8 +241,22 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="5" column="1" colspan="3">
<widget class="EncodingComboBox" name="opt_input_encoding">
<property name="editable">
<bool>true</bool>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<customwidgets>
<customwidget>
<class>EncodingComboBox</class>
<extends>QComboBox</extends>
<header>widgets.h</header>
</customwidget>
</customwidgets>
<resources> <resources>
<include location="../../../../resources/images.qrc"/> <include location="../../../../resources/images.qrc"/>
<include location="../../../../resources/images.qrc"/> <include location="../../../../resources/images.qrc"/>

View File

@ -616,6 +616,31 @@ class ComboBoxWithHelp(QComboBox):
QComboBox.hidePopup(self) QComboBox.hidePopup(self)
self.set_state() self.set_state()
class EncodingComboBox(QComboBox):
'''
A combobox that holds text encodings support
by Python. This is only populated with the most
common and standard encodings. There is no good
way to programatically list all supported encodings
using encodings.aliases.aliases.keys(). It
will not work.
'''
ENCODINGS = ['', 'cp1252', 'latin1', 'utf-8', '', 'ascii', 'big5', 'cp1250', 'cp1251', 'cp1253',
'cp1254', 'cp1255', 'cp1256', 'euc_jp', 'euc_kr', 'gb2312', 'gb18030',
'hz', 'iso2022_jp', 'iso2022_kr', 'iso8859_5', 'shift_jis',
]
def __init__(self, parent=None):
QComboBox.__init__(self, parent)
self.setEditable(True)
self.setLineEdit(EnLineEdit(self))
for item in self.ENCODINGS:
self.addItem(item)
class PythonHighlighter(QSyntaxHighlighter): class PythonHighlighter(QSyntaxHighlighter):
Rules = [] Rules = []

View File

@ -133,7 +133,15 @@ class CustomColumns(object):
def adapt_bool(x, d): def adapt_bool(x, d):
if isinstance(x, (str, unicode, bytes)): if isinstance(x, (str, unicode, bytes)):
x = bool(int(x)) x = x.lower()
if x == 'true':
x = True
elif x == 'false':
x = False
elif x == 'none':
x = None
else:
x = bool(int(x))
return x return x
def adapt_enum(x, d): def adapt_enum(x, d):
@ -142,9 +150,17 @@ class CustomColumns(object):
v = None v = None
return v return v
def adapt_number(x, d):
if isinstance(x, (str, unicode, bytes)):
if x.lower() == 'none':
return None
if d['datatype'] == 'int':
return int(x)
return float(x)
self.custom_data_adapters = { self.custom_data_adapters = {
'float': lambda x,d : x if x is None else float(x), 'float': adapt_number,
'int': lambda x,d : x if x is None else int(x), 'int': adapt_number,
'rating':lambda x,d : x if x is None else min(10., max(0., float(x))), 'rating':lambda x,d : x if x is None else min(10., max(0., float(x))),
'bool': adapt_bool, 'bool': adapt_bool,
'comments': lambda x,d: adapt_text(x, {'is_multiple':False}), 'comments': lambda x,d: adapt_text(x, {'is_multiple':False}),

View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

200
src/calibre/utils/wmf/wmf.c Normal file
View File

@ -0,0 +1,200 @@
#define UNICODE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <libwmf/api.h>
#include <libwmf/svg.h>
typedef struct {
char *data;
size_t len;
size_t pos;
} buf;
//This code is taken mostly from the Abiword wmf plugin
// returns unsigned char cast to int, or EOF
static int wmf_WMF_read(void * context) {
char c;
buf *info = (buf*)context;
if (info->pos == info->len)
return EOF;
c = info->data[pos];
info->pos++;
return (int)c;
}
// returns (-1) on error, else 0
static int wmf_WMF_seek(void * context, long pos) {
buf* info = (buf*) context;
if (pos < 0 || (size_t)pos > info->len) return -1;
info->pos = (size_t)pos;
return 0;
}
// returns (-1) on error, else pos
static long wmf_WMF_tell(void * context) {
buf* info = (buf*) context;
return (long) info->pos;
}
#define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); };
static PyObject *
wmf_render(PyObject *self, PyObject *args) {
char *data;
Py_ssize_t sz;
PyObject *ans;
unsigned int disp_width = 0;
unsigned int disp_height = 0;
float wmf_width;
float wmf_height;
float ratio_wmf;
float ratio_bounds;
unsigned long flags;
unsigned int max_width = 1600;
unsigned int max_height = 1200;
unsigned long max_flags = 0;
static const char* Default_Description = "wmf2svg";
wmf_error_t err;
wmf_svg_t* ddata = 0;
wmfAPI* API = 0;
wmfD_Rect bbox;
wmfAPI_Options api_options;
buf read_info;
char *stream = NULL;
unsigned long stream_len = 0;
if (!PyArg_ParseTuple(args, "s#", &data, &sz))
return NULL;
flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION;
api_options.function = wmf_svg_function;
err = wmf_api_create(&API, flags, &api_options);
if (err != wmf_E_None) {
CLEANUP;
return PyErr_NoMemory();
}
read_info.data = data;
read_info.len = sz;
read_info.pos = 0;
err = wmf_bbuf_input(API, wmf_WMF_read, wmf_WMF_seek, wmf_WMF_tell, (void *) &read_info);
if (err != wmf_E_None) {
CLEANUP;
PyErr_SetString(PyExc_Exception, "Failed to initialize WMF input");
return NULL;
}
err = wmf_scan(API, 0, &(bbox));
if (err != wmf_E_None)
{
CLEANUP;
PyErr_SetString(PyExc_ValueError, "Failed to scan the WMF");
return NULL;
}
/* Okay, got this far, everything seems cool.
*/
ddata = WMF_SVG_GetData (API);
ddata->out = wmf_stream_create(API, NULL);
ddata->Description = (char *)Default_Description;
ddata->bbox = bbox;
wmf_display_size(API, &disp_width, &disp_height, 96, 96);
wmf_width = (float) disp_width;
wmf_height = (float) disp_height;
if ((wmf_width <= 0) || (wmf_height <= 0)) {
CLEANUP;
PyErr_SetString(PyExc_ValueError, "Bad WMF image size");
return NULL;
}
if ((wmf_width > (float) max_width )
|| (wmf_height > (float) max_height)) {
ratio_wmf = wmf_height / wmf_width;
ratio_bounds = (float) max_height / (float) max_width;
if (ratio_wmf > ratio_bounds) {
ddata->height = max_height;
ddata->width = (unsigned int) ((float) ddata->height / ratio_wmf);
}
else {
ddata->width = max_width;
ddata->height = (unsigned int) ((float) ddata->width * ratio_wmf);
}
}
else {
ddata->width = (unsigned int) ceil ((double) wmf_width );
ddata->height = (unsigned int) ceil ((double) wmf_height);
}
ddata->flags |= WMF_SVG_INLINE_IMAGES;
ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
err = wmf_play(API, 0, &(bbox));
if (err != wmf_E_None) {
CLEANUP;
PyErr_SetString(PyExc_ValueError, "Playing of the WMF file failed");
return NULL;
}
wmf_stream_destroy(API, ddata->out, &stream, &stream_len);
ans = Py_BuildValue("s#", stream, stream_len);
wmf_free(API, stream);
wmf_api_destroy (API);
return ans;
}
static PyMethodDef wmf_methods[] = {
{"render", wmf_render, METH_VARARGS,
"render(path) -> Render wmf as svg."
},
{NULL} /* Sentinel */
};
PyMODINIT_FUNC
initwmf(void)
{
PyObject* m;
m = Py_InitModule3("wmf", wmf_methods,
"Wrapper for the libwmf library");
}