Added recipe for hnonline.sk - Slovak daily news. It is based on kwetal's SME recipe

This commit is contained in:
lacike 2013-02-22 20:33:06 +01:00
commit 862495d512
11 changed files with 238 additions and 30 deletions

View File

@ -0,0 +1,27 @@
# coding=utf-8
# https://github.com/iemejia/calibrecolombia
'''
http://www.elmalpensante.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElMalpensante(BasicNewsRecipe):
title = u'El Malpensante'
language = 'es_CO'
__author__ = 'Ismael Mejia <iemejia@gmail.com>'
cover_url = 'http://elmalpensante.com/img/layout/logo.gif'
description = 'El Malpensante'
oldest_article = 30
simultaneous_downloads = 20
#tags = 'news, sport, blog'
use_embedded_content = True
remove_empty_feeds = True
max_articles_per_feed = 100
feeds = [(u'Artículos', u'http://www.elmalpensante.com/articulosRSS.php'),
(u'Malpensantías', u'http://www.elmalpensante.com/malpensantiasRSS.php'),
(u'Margaritas', u'http://www.elmalpensante.com/margaritasRSS.php'),
# This one is almost the same as articulos so we leave articles
# (u'Noticias', u'http://www.elmalpensante.com/noticiasRSS.php'),
]

68
recipes/hnonline.recipe Normal file
View File

@ -0,0 +1,68 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class HNonlineRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'lacike'
language = 'sk'
version = 1
title = u'HNonline'
publisher = u'HNonline'
category = u'News, Newspaper'
description = u'News from Slovakia'
cover_url = u'http://hnonline.sk/img/sk/_relaunch/logo2.png'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
# Feeds from: http://rss.hnonline.sk, for listing see http://rss.hnonline.sk/prehlad
feeds = []
feeds.append((u'HNonline|Ekonomika a firmy', u'http://rss.hnonline.sk/?p=kC1000'))
feeds.append((u'HNonline|Slovensko', u'http://rss.hnonline.sk/?p=kC2000'))
feeds.append((u'HNonline|Svet', u'http://rss.hnonline.sk/?p=kC3000'))
feeds.append((u'HNonline|\u0160port', u'http://rss.hnonline.sk/?p=kC4000'))
feeds.append((u'HNonline|Online rozhovor', u'http://rss.hnonline.sk/?p=kCR000'))
feeds.append((u'FinWeb|Spr\u00E1vy zo sveta financi\u00ED', u'http://rss.finweb.hnonline.sk/spravodajstvo'))
feeds.append((u'FinWeb|Koment\u00E1re a anal\u00FDzy', u'http://rss.finweb.hnonline.sk/?p=kPC200'))
feeds.append((u'FinWeb|Invest\u00EDcie', u'http://rss.finweb.hnonline.sk/?p=kPC300'))
feeds.append((u'FinWeb|Svet akci\u00ED', u'http://rss.finweb.hnonline.sk/?p=kPC400'))
feeds.append((u'FinWeb|Rozhovory', u'http://rss.finweb.hnonline.sk/?p=kPC500'))
feeds.append((u'FinWeb|T\u00E9ma t\u00FD\u017Ed\u0148a', u'http://rss.finweb.hnonline.sk/?p=kPC600'))
feeds.append((u'FinWeb|Rebr\u00ED\u010Dky', u'http://rss.finweb.hnonline.sk/?p=kPC700'))
feeds.append((u'HNstyle|Kult\u00FAra', u'http://style.hnonline.sk/?p=kTC100'))
feeds.append((u'HNstyle|Auto-moto', u'http://style.hnonline.sk/?p=kTC200'))
feeds.append((u'HNstyle|Digit\u00E1l', u'http://style.hnonline.sk/?p=kTC300'))
feeds.append((u'HNstyle|Veda', u'http://style.hnonline.sk/?p=kTCV00'))
feeds.append((u'HNstyle|Dizajn', u'http://style.hnonline.sk/?p=kTC400'))
feeds.append((u'HNstyle|Cestovanie', u'http://style.hnonline.sk/?p=kTCc00'))
feeds.append((u'HNstyle|V\u00EDkend', u'http://style.hnonline.sk/?p=kTC800'))
feeds.append((u'HNstyle|Gastro', u'http://style.hnonline.sk/?p=kTC600'))
feeds.append((u'HNstyle|M\u00F3da', u'http://style.hnonline.sk/?p=kTC700'))
feeds.append((u'HNstyle|Modern\u00E1 \u017Eena', u'http://style.hnonline.sk/?p=kTCA00'))
feeds.append((u'HNstyle|Pre\u010Do nie?!', u'http://style.hnonline.sk/?p=k7C000'))
keep_only_tags = []
keep_only_tags.append(dict(name = 'h1', attrs = {'class': 'detail-titulek'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'detail-podtitulek'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'detail-perex'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'detail-text'}))
remove_tags = []
#remove_tags.append(dict(name = 'div', attrs = {'id': re.compile('smeplayer.*')}))
remove_tags_after = []
#remove_tags_after = [dict(name = 'p', attrs = {'class': 'autor_line'})]
extra_css = '''
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)}
body {font-family: sans1, serif1;}
'''

BIN
recipes/icons/hnonline.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

View File

@ -0,0 +1,33 @@
# coding=utf-8
# https://github.com/iemejia/calibrecolombia
'''
http://www.cromos.com.co/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElMalpensante(BasicNewsRecipe):
title = u'Revista Cromos'
language = 'es_CO'
__author__ = 'Ismael Mejia <iemejia@gmail.com>'
cover_url = 'http://www.cromos.com.co/sites/cromos.com.co/themes/cromos_theme/images/logo_morado.gif'
description = 'Revista Cromos'
oldest_article = 7
simultaneous_downloads = 20
#tags = 'news, sport, blog'
use_embedded_content = True
remove_empty_feeds = True
max_articles_per_feed = 100
feeds = [(u'Cromos', u'http://www.cromos.com.co/rss.xml'),
(u'Moda', u'http://www.cromos.com.co/moda/feed'),
(u'Estilo de Vida', u'http://www.cromos.com.co/estilo-de-vida/feed'),
(u'Cuidado Personal', u'http://www.cromos.com.co/estilo-de-vida/cuidado-personal/feed'),
(u'Salud y Alimentación', u'http://www.cromos.com.co/estilo-de-vida/salud-y-alimentacion/feed'),
(u'Personajes', u'http://www.cromos.com.co/personajes/feed'),
(u'Actualidad', u'http://www.cromos.com.co/personajes/actualidad/feed'),
(u'Espectáculo', u'http://www.cromos.com.co/personajes/espectaculo/feed'),
(u'Reportajes', u'http://www.cromos.com.co/reportajes/feed'),
(u'Eventos', u'http://www.cromos.com.co/eventos/feed'),
(u'Modelos', u'http://www.cromos.com.co/modelos/feed'),
]

View File

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# https://github.com/iemejia/calibrecolombia
'''
http://www.unperiodico.unal.edu.co/
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class UNPeriodico(BasicNewsRecipe):
title = u'UN Periodico'
language = 'es_CO'
__author__ = 'Ismael Mejia <iemejia@gmail.com>'
cover_url = 'http://www.unperiodico.unal.edu.co/fileadmin/templates/periodico/img/logoperiodico.png'
description = 'UN Periodico'
oldest_article = 30
max_articles_per_feed = 100
publication_type = 'newspaper'
feeds = [
(u'UNPeriodico', u'http://www.unperiodico.unal.edu.co/rss/type/rss2/')
]

View File

@ -55,20 +55,14 @@ class WallStreetJournal(BasicNewsRecipe):
]
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]
use_javascript_to_login = True
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open('http://commerce.wsj.com/auth/login')
br.select_form(nr=1)
br['user'] = self.username
br['password'] = self.password
res = br.submit()
raw = res.read()
if 'Welcome,' not in raw and '>Logout<' not in raw and '>Log Out<' not in raw:
raise ValueError('Failed to log in to wsj.com, check your '
'username and password')
return br
def javascript_login(self, br, username, password):
br.visit('https://id.wsj.com/access/pages/wsj/us/login_standalone.html?mg=com-wsj', timeout=120)
f = br.select_form(nr=0)
f['username'] = username
f['password'] = password
br.submit(timeout=120)
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):

View File

@ -211,6 +211,12 @@ class Cache(object):
self.fields['ondevice'] = create_field('ondevice',
VirtualTable('ondevice'))
for name, field in self.fields.iteritems():
if name[0] == '#' and name.endswith('_index'):
field.series_field = self.fields[name[:-len('_index')]]
elif name == 'series_index':
field.series_field = self.fields['series']
@read_api
def field_for(self, name, book_id, default_value=None):
'''

View File

@ -46,6 +46,7 @@ class Field(object):
elif name == 'languages':
self.category_formatter = calibre_langcode_to_name
self.writer = Writer(self)
self.series_field = None
@property
def metadata(self):

View File

@ -22,7 +22,11 @@ class WritingTest(BaseTest):
def create_getter(self, name, getter=None):
if getter is None:
ans = lambda db:partial(db.get_custom, label=name[1:],
if name.endswith('_index'):
ans = lambda db:partial(db.get_custom_extra, index_is_id=True,
label=name[1:].replace('_index', ''))
else:
ans = lambda db:partial(db.get_custom, label=name[1:],
index_is_id=True)
else:
ans = lambda db:partial(getattr(db, getter), index_is_id=True)
@ -41,11 +45,11 @@ class WritingTest(BaseTest):
self.create_setter(name, setter))
def run_tests(self, tests):
cl = self.cloned_library
results = {}
for test in tests:
results[test] = []
for val in test.vals:
cl = self.cloned_library
cache = self.init_cache(cl)
cache.set_field(test.name, {1: val})
cached_res = cache.field_for(test.name, 1)
@ -53,23 +57,35 @@ class WritingTest(BaseTest):
db = self.init_old(cl)
getter = test.getter(db)
sqlite_res = getter(1)
test.setter(db)(1, val)
old_cached_res = getter(1)
self.assertEqual(old_cached_res, cached_res,
'Failed setting for %s with value %r, cached value not the same. Old: %r != New: %r'%(
test.name, val, old_cached_res, cached_res))
db.refresh()
old_sqlite_res = getter(1)
self.assertEqual(old_sqlite_res, sqlite_res,
'Failed setting for %s, sqlite value not the same: %r != %r'%(
test.name, old_sqlite_res, sqlite_res))
if test.name.endswith('_index'):
val = float(val) if val is not None else 1.0
self.assertEqual(sqlite_res, val,
'Failed setting for %s with value %r, sqlite value not the same. val: %r != sqlite_val: %r'%(
test.name, val, val, sqlite_res))
else:
test.setter(db)(1, val)
old_cached_res = getter(1)
self.assertEqual(old_cached_res, cached_res,
'Failed setting for %s with value %r, cached value not the same. Old: %r != New: %r'%(
test.name, val, old_cached_res, cached_res))
db.refresh()
old_sqlite_res = getter(1)
self.assertEqual(old_sqlite_res, sqlite_res,
'Failed setting for %s, sqlite value not the same: %r != %r'%(
test.name, old_sqlite_res, sqlite_res))
del db
def test_one_one(self):
'Test setting of values in one-one fields'
tests = []
tests = [self.create_test('#yesno', (True, False, 'true', 'false', None))]
for name, getter, setter in (
('#series_index', None, None),
('series_index', 'series_index', 'set_series_index'),
('#float', None, None),
):
vals = ['1.5', None, 0, 1.0]
tests.append(self.create_test(name, tuple(vals), getter, setter))
for name, getter, setter in (
('pubdate', 'pubdate', 'set_pubdate'),
('timestamp', 'timestamp', 'set_timestamp'),
@ -78,6 +94,25 @@ class WritingTest(BaseTest):
tests.append(self.create_test(
name, ('2011-1-12', UNDEFINED_DATE, None), getter, setter))
for name, getter, setter in (
('title', 'title', 'set_title'),
('uuid', 'uuid', 'set_uuid'),
('author_sort', 'author_sort', 'set_author_sort'),
('sort', 'title_sort', 'set_title_sort'),
('#comments', None, None),
('comments', 'comments', 'set_comment'),
):
vals = ['something', None]
if name not in {'comments', '#comments'}:
# Setting text column to '' returns None in the new backend
# and '' in the old. I think None is more correct.
vals.append('')
if name == 'comments':
# Again new behavior of deleting comment rather than setting
# empty string is more correct.
vals.remove(None)
tests.append(self.create_test(name, tuple(vals), getter, setter))
self.run_tests(tests)
def tests():

View File

@ -98,10 +98,14 @@ def get_adapter(name, metadata):
if name == 'title':
return lambda x: ans(x) or _('Unknown')
if name == 'author_sort':
return lambda x: ans(x) or ''
if name == 'authors':
return lambda x: ans(x) or (_('Unknown'),)
if name in {'timestamp', 'last_modified'}:
return lambda x: ans(x) or UNDEFINED_DATE
if name == 'series_index':
return lambda x: 1.0 if ans(x) is None else ans(x)
return ans
# }}}
@ -134,6 +138,21 @@ def one_one_in_other(book_id_val_map, db, field, *args):
field.table.book_col_map.update(updated)
return set(book_id_val_map)
def custom_series_index(book_id_val_map, db, field, *args):
series_field = field.series_field
sequence = []
for book_id, sidx in book_id_val_map.iteritems():
if sidx is None:
sidx = 1.0
ids = series_field.ids_for_book(book_id)
if ids:
sequence.append((sidx, book_id, ids[0]))
field.table.book_col_map[book_id] = sidx
if sequence:
db.conn.executemany('UPDATE %s SET %s=? WHERE book=? AND value=?'%(
field.metadata['table'], field.metadata['column']), sequence)
return {s[0] for s in sequence}
def dummy(book_id_val_map, *args):
return set()
@ -148,13 +167,16 @@ class Writer(object):
if dt == 'composite' or field.name in {
'id', 'cover', 'size', 'path', 'formats', 'news'}:
self.set_books_func = dummy
elif self.name[0] == '#' and self.name.endswith('_index'):
self.set_books_func = custom_series_index
elif field.is_many:
# TODO: Implement this
pass
# TODO: Remember to change commas to | when writing authors to sqlite
else:
self.set_books_func = (one_one_in_books if field.metadata['table']
== 'books' else one_one_in_other)
if self.name in {'timestamp', 'uuid'}:
if self.name in {'timestamp', 'uuid', 'sort'}:
self.accept_vals = bool
def set_books(self, book_id_val_map, db):

View File

@ -369,7 +369,7 @@ def build_pipe(print_error=True):
t.start()
t.join(3.0)
if t.is_alive():
if iswindows():
if iswindows:
cant_start()
else:
f = os.path.expanduser('~/.calibre_calibre GUI.lock')