Sync to trunk.

This commit is contained in:
John Schember 2011-03-10 20:35:26 -05:00
commit b58cd26fa1
43 changed files with 1054 additions and 125 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 495 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 414 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 840 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 302 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 556 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 437 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 728 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 290 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 494 B

View File

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
cotidianul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Cotidianul(BasicNewsRecipe):
title = u'Cotidianul'
__author__ = u'Silviu Cotoar\u0103'
description = u''
publisher = u'Cotidianul'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.cotidianul.ro/images/cotidianul.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'titlu'})
, dict(name='div', attrs={'class':'gallery clearfix'})
, dict(name='div', attrs={'align':'justify'})
]
remove_tags = [
dict(name='div', attrs={'class':['space']})
, dict(name='div', attrs={'id':['title_desc']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['space']})
, dict(name='span', attrs={'class':['date']})
]
feeds = [
(u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
ele.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Ele(BasicNewsRecipe):
title = u'Ele'
__author__ = u'Silviu Cotoar\u0103'
description = u'Dezv\u0103luie ceea ce e\u015fti'
publisher = u'Ele'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Femei'
encoding = 'utf-8'
cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='h1', attrs={'class':'article_title'})
, dict(name='div', attrs={'class':'article_text'})
]
feeds = [
(u'Feeds', u'http://www.ele.ro/rss_must_read')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
revistafelicia.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Felicia(BasicNewsRecipe):
title = u'Revista Felicia'
__author__ = u'Silviu Cotoar\u0103'
description = u'O revist\u0103 pentru sufletul t\u0103u'
publisher = u'Revista Felicia'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste'
encoding = 'utf-8'
cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'header'})
, dict(name='div', attrs={'id':'contentArticol'})
]
remove_tags = [
dict(name='img',attrs={'src':['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']})
, dict(name='div',attrs={'class':['content']})
]
feeds = [
(u'Feeds', u'http://www.revistafelicia.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
financiarul.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Financiarul(BasicNewsRecipe):
title = u'Financiarul'
__author__ = u'Silviu Cotoar\u0103'
description = u'FIN.ro'
publisher = u'Financiarul'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.financiarul.com/templates/default/images/logo.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'col2ContentLeftL'})
]
remove_tags = [
dict(name='div',attrs={'class':['infoArticol']})
, dict(name='ul', attrs={'class':'navSectiuni'})
, dict(name='div', attrs={'class':'separator separatorTop'})
, dict(name='div', attrs={'class':'infoArticol infoArticolBottom'})
, dict(name='ul', attrs={'class':['related']})
, dict(name='div', attrs={'class':['slot panel300 panelGri300 panelGri300s panelGri300sm']})
]
remove_tags_after = [
dict(name='ul', attrs={'class':['related']})
]
feeds = [
(u'Feeds', u'http://www.financiarul.com/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
imperatortravel.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Imperatortravel(BasicNewsRecipe):
title = u'Imperator Travel'
__author__ = u'Silviu Cotoar\u0103'
description = u'C\u0103l\u0103torii'
publisher = u'Imperator Travel'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Turism,Calatorii'
encoding = 'utf-8'
cover_url = 'http://www.imperatortravel.ro/images/header-1.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'article first_main_article'})
]
remove_tags = [
dict(name='div', attrs={'class':['meta']})
, dict(name='body', attrs={'class':['transparent_widget ff3 win Locale_en_US']})
, dict(name='div', attrs={'class':['connect_widget']})
, dict(name='ul', attrs={'class':['similar-posts']})
]
remove_tags_after = [
dict(name='ul', attrs={'class':['similar-posts']})
]
feeds = [
(u'Feeds', u'http://feeds.feedburner.com/ImperatorTravels')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,42 @@
import urllib2
from calibre.web.feeds.news import BasicNewsRecipe
class JBPress(BasicNewsRecipe):
title = u'JBPress'
language = 'ja'
description = u'Japan Business Press New articles (using small print version)'
__author__ = 'Ado Nishimura'
needs_subscription = True
oldest_article = 7
max_articles_per_feed = 100
remove_tags_before = dict(id='wrapper')
no_stylesheets = True
feeds = [('JBPress new article', 'http://feed.ismedia.jp/rss/jbpress/all.rdf')]
def get_cover_url(self):
return 'http://www.jbpress.co.jp/common/images/v1/jpn/common/logo.gif'
def get_browser(self):
html = '''<form action="https://jbpress.ismedia.jp/auth/dologin/http://jbpress.ismedia.jp/articles/print/5549" method="post">
<input id="login" name="login" type="text"/>
<input id="password" name="password" type="password"/>
<input id="rememberme" name="rememberme" type="checkbox"/>
</form>
'''
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://jbpress.ismedia.jp/articles/print/5549')
response = br.response()
response.set_data(html)
br.set_response(response)
br.select_form(nr=0)
br["login"] = self.username
br['password'] = self.password
br.submit()
return br
def print_version(self, url):
url = urllib2.urlopen(url).geturl() # resolve redirect.
return url.replace('/-/', '/print/')

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
lanacion.com.ar lanacion.com.ar
''' '''
@ -17,14 +17,16 @@ class Lanacion(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
language = 'es_AR' language = 'es_AR'
delay = 14
publication_type = 'newspaper' publication_type = 'newspaper'
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif' masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif'
extra_css = """ h1{font-family: Georgia,serif} extra_css = """
h2{color: #626262} h1{font-family: Georgia,serif}
h2{color: #626262; font-weight: normal; font-size: 1.1em}
body{font-family: Arial,sans-serif} body{font-family: Arial,sans-serif}
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
.notaFecha{color: #808080} .notaFecha{color: #808080; font-size: small}
.notaEpigrafe{font-size: x-small} .notaEpigrafe{font-size: x-small}
.topNota h1{font-family: Arial,sans-serif} .topNota h1{font-family: Arial,sans-serif}
""" """
@ -37,47 +39,75 @@ class Lanacion(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags = [dict(name='div', attrs={'class':['nota floatFix','topNota','nota','post']})] keep_only_tags = [
dict(name='div', attrs={'class':['topNota','itemHeader','nota','itemBody']})
,dict(name='div', attrs={'id':'content'})
]
remove_tags = [ remove_tags = [
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' }) dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']}) ,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']})
,dict(name='div' , attrs={'class':['cajaHerramientas noprint','cajaHerramientas floatFix'] }) ,dict(name='div' , attrs={'class':['titulosMultimedia','herramientas noprint','cajaHerramientas noprint','cajaHerramientas floatFix'] })
,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']}) ,dict(attrs={'class':['izquierda','espacio17','espacio10','espacio20','floatFix ultimasNoticias','relacionadas','titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input']) ,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
] ]
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']}) remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
remove_attributes = ['height','width','visible','onclick','data-count','name'] remove_attributes = ['height','width','visible','onclick','data-count','name']
feeds = [ feeds = [
(u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' ) (u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
,(u'Politica' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=30' ) ,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
,(u'Economia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=272' ) ,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
,(u'Deportes' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=131' ) ,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' )
,(u'Informacion General' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=21' ) ,(u'Cultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1' )
,(u'Cultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1' ) ,(u'Opinion' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=28' )
,(u'Opinion' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=28' ) ,(u'Espectaculos' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=120' )
,(u'Espectaculos' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=120' ) ,(u'Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7' )
,(u'Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7' ) ,(u'Ciencia&Salud' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=498' )
,(u'Ciencia&Salud' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=498' ) ,(u'Revista' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=494' )
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' ) ,(u'Enfoques' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=421' )
,(u'Enfoques' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=421' ) ,(u'Comercio Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=347' )
,(u'Comercio Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=347' ) ,(u'Tecnologia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=432' )
,(u'Tecnologia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=432' ) ,(u'Arquitectura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=366' )
,(u'Arquitectura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=366' ) ,(u'Turismo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=504' )
,(u'Turismo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=504' ) ,(u'Al volante' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=371' )
,(u'Al volante' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=371' ) ,(u'El Campo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=337' )
,(u'El Campo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=337' ) ,(u'Moda y Belleza' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1312')
,(u'Moda y Belleza' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1312' ) ,(u'Inmuebles Comerciales', u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1363')
,(u'Inmuebles Comerciales', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1363' ) ,(u'Countries' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1348')
,(u'Countries' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1348' ) ,(u'adnCultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6734')
,(u'adnCultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6734' ) ,(u'The WSJ Americas' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6373')
,(u'The Wall Street Journal Americas', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6373' ) ,(u'Comunidad' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1344')
,(u'Estilo de vida' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7353' ) ,(u'Management' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7380')
,(u'Management' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7380' ) ,(u'Bicentenario' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7276')
,(u'Bicentenario' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7276' )
] ]
def get_article_url(self, article):
link = BasicNewsRecipe.get_article_url(self,article)
if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
return self.browser.open_novisit(link).geturl()
if link.rfind('galeria=') > 0:
return None
return link
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return self.adeify_images(soup) for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
monden.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Monden(BasicNewsRecipe):
title = u'Monden'
__author__ = u'Silviu Cotoar\u0103'
description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102'
publisher = u'Monden'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Muzica'
encoding = 'utf-8'
cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':'content'})
]
remove_tags = [
dict(name='div', attrs={'class':['postAuthor']})
, dict(name='div', attrs={'class':['postLike']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['postLike']})
]
feeds = [
(u'Feeds', u'http://www.monden.info/feed/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,33 @@
EMAILADDRESS = 'hoge@foobar.co.jp'
from calibre.web.feeds.news import BasicNewsRecipe
class NBOnline(BasicNewsRecipe):
title = u'Nikkei Business Online'
language = 'ja'
description = u'Nikkei Business Online New articles. PLEASE NOTE: You need to edit EMAILADDRESS line of this "nbonline.recipe" file to set your e-mail address which is needed when login. (file is in "Calibre2/resources/recipes" directory.)'
__author__ = 'Ado Nishimura'
needs_subscription = True
oldest_article = 7
max_articles_per_feed = 100
remove_tags_before = dict(id='kanban')
remove_tags = [dict(name='div', id='footer')]
feeds = [('Nikkei Buisiness Online', 'http://business.nikkeibp.co.jp/rss/all_nbo.rdf')]
def get_cover_url(self):
return 'http://business.nikkeibp.co.jp/images/nbo/200804/parts/logo.gif'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
br.select_form(name='loginActionForm')
br['email'] = EMAILADDRESS
br['userId'] = self.username
br['password'] = self.password
br.submit()
return br
def print_version(self, url):
return url + '?ST=print'

View File

@ -0,0 +1,23 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1299640653(BasicNewsRecipe):
title = u'Oakland North'
oldest_article = 30
max_articles_per_feed = 100
language = 'en'
__author__ = 'noah'
description = 'Oakland North'
category = 'news'
no_stylesheets = True
masthead_url = 'http://oaklandnorth.net/wp-content/themes/oaklandnorth/images/masthead.png'
keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b(?!-)', re.IGNORECASE)})]
remove_tags_after = [dict(name='p', attrs={'class':'post-postscript'})]
remove_tags = [dict(name='p', attrs={'class':'post-postscript'})]
feeds = [(u'All Headlines', u'http://oaklandnorth.net/feed/')]

View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
promotor.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Promotor(BasicNewsRecipe):
title = u'Promotor'
__author__ = u'Silviu Cotoar\u0103'
description = u'Auto-moto'
publisher = u'Promotor'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,TV,Auto'
encoding = 'utf-8'
cover_url = 'http://www.promotor.ro/images/logo_promotor.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'casetatitluarticol'})
, dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'})
, dict(name='div', attrs={'class':'textb'})
, dict(name='div', attrs={'class':'contentarticol'})
]
remove_tags = [
dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
, dict(name='div', attrs={'class':['etichetagry']})
, dict(name='span', attrs={'class':['textb']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['etichetagry']})
, dict(name='span', attrs={'class':['textb']})
]
feeds = [
(u'Feeds', u'http://www.promotor.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
publika.md
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Publika(BasicNewsRecipe):
title = u'Publika'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u015etiri din Moldova'
publisher = u'Publika'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Moldova'
encoding = 'utf-8'
cover_url = 'http://assets.publika.md/images/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'colLeft'})
]
remove_tags = [
dict(name='div', attrs={'class':['articleInfo']})
, dict(name='div', attrs={'class':['articleRelated']})
, dict(name='div', attrs={'class':['roundedBox socialSharing']})
, dict(name='div', attrs={'class':['comment clearfix']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['roundedBox socialSharing']})
, dict(name='div', attrs={'class':['comment clearfix']})
]
feeds = [
(u'Feeds', u'http://rss.publika.md/stiri.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
timesnewroman.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TimesNewRoman(BasicNewsRecipe):
title = u'Times New Roman'
__author__ = u'Silviu Cotoar\u0103'
description = u'Cotidian independent de umor voluntar'
publisher = u'Times New Roman'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Fun'
encoding = 'utf-8'
cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'page'})
]
remove_tags = [
dict(name='p', attrs={'class':['articleinfo']})
, dict(name='div',attrs={'class':['vergefacebooklike']})
, dict(name='div', attrs={'class':'cleared'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'cleared'})
]
feeds = [
(u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -30,7 +30,6 @@ def strftime(epoch, zone=time.gmtime):
def get_connected_device(): def get_connected_device():
from calibre.customize.ui import device_plugins from calibre.customize.ui import device_plugins
from calibre.devices.scanner import DeviceScanner from calibre.devices.scanner import DeviceScanner
import uuid
dev = None dev = None
scanner = DeviceScanner() scanner = DeviceScanner()
scanner.scan() scanner.scan()
@ -48,7 +47,7 @@ def get_connected_device():
for d in connected_devices: for d in connected_devices:
try: try:
d.open(str(uuid.uuid4())) d.open()
except: except:
continue continue
else: else:

View File

@ -6,7 +6,7 @@ Provides a command-line and optional graphical interface to the SONY Reader PRS-
For usage information run the script. For usage information run the script.
""" """
import StringIO, sys, time, os, uuid import StringIO, sys, time, os
from optparse import OptionParser from optparse import OptionParser
from calibre import __version__, __appname__ from calibre import __version__, __appname__
@ -213,7 +213,7 @@ def main():
for d in connected_devices: for d in connected_devices:
try: try:
d.open(str(uuid.uuid4())) d.open(None)
except: except:
continue continue
else: else:

View File

@ -908,6 +908,19 @@ class Manifest(object):
pass pass
data = first_pass(data) data = first_pass(data)
if data.tag == 'HTML':
# Lower case all tag and attribute names
data.tag = data.tag.lower()
for x in data.iterdescendants():
try:
x.tag = x.tag.lower()
for key, val in list(x.attrib.iteritems()):
del x.attrib[key]
key = key.lower()
x.attrib[key] = val
except:
pass
# Handle weird (non-HTML/fragment) files # Handle weird (non-HTML/fragment) files
if barename(data.tag) != 'html': if barename(data.tag) != 'html':
self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href) self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)

View File

@ -13,6 +13,7 @@ from urlparse import urlparse
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
from calibre.ebooks import ConversionError from calibre.ebooks import ConversionError
from calibre.utils.ordered_dict import OrderedDict
def XPath(x): def XPath(x):
try: try:
@ -95,10 +96,8 @@ class DetectStructure(object):
self.log.exception('Failed to mark chapter') self.log.exception('Failed to mark chapter')
def create_level_based_toc(self): def create_level_based_toc(self):
if self.opts.level1_toc is None: if self.opts.level1_toc is not None:
return self.add_leveled_toc_items()
for item in self.oeb.spine:
self.add_leveled_toc_items(item)
def create_toc_from_chapters(self): def create_toc_from_chapters(self):
counter = self.oeb.toc.next_play_order() counter = self.oeb.toc.next_play_order()
@ -145,49 +144,57 @@ class DetectStructure(object):
return text, href return text, href
def add_leveled_toc_items(self, item): def add_leveled_toc_items(self):
level1 = XPath(self.opts.level1_toc)(item.data) added = OrderedDict()
level1_order = [] added2 = OrderedDict()
document = item
counter = 1 counter = 1
if level1: for document in self.oeb.spine:
added = {} previous_level1 = list(added.itervalues())[-1] if added else None
for elem in level1: previous_level2 = list(added2.itervalues())[-1] if added2 else None
for elem in XPath(self.opts.level1_toc)(document.data):
text, _href = self.elem_to_link(document, elem, counter) text, _href = self.elem_to_link(document, elem, counter)
counter += 1 counter += 1
if text: if text:
node = self.oeb.toc.add(text, _href, node = self.oeb.toc.add(text, _href,
play_order=self.oeb.toc.next_play_order()) play_order=self.oeb.toc.next_play_order())
level1_order.append(node)
added[elem] = node added[elem] = node
#node.add(_('Top'), _href) #node.add(_('Top'), _href)
if self.opts.level2_toc is not None:
added2 = {} if self.opts.level2_toc is not None and added:
level2 = list(XPath(self.opts.level2_toc)(document.data)) for elem in XPath(self.opts.level2_toc)(document.data):
for elem in level2:
level1 = None level1 = None
for item in document.data.iterdescendants(): for item in document.data.iterdescendants():
if item in added.keys(): if item in added:
level1 = added[item] level1 = added[item]
elif item == elem and level1 is not None: elif item == elem:
if level1 is None:
if previous_level1 is None:
break
level1 = previous_level1
text, _href = self.elem_to_link(document, elem, counter) text, _href = self.elem_to_link(document, elem, counter)
counter += 1 counter += 1
if text: if text:
added2[elem] = level1.add(text, _href, added2[elem] = level1.add(text, _href,
play_order=self.oeb.toc.next_play_order()) play_order=self.oeb.toc.next_play_order())
if self.opts.level3_toc is not None: break
level3 = list(XPath(self.opts.level3_toc)(document.data))
for elem in level3: if self.opts.level3_toc is not None and added2:
for elem in XPath(self.opts.level3_toc)(document.data):
level2 = None level2 = None
for item in document.data.iterdescendants(): for item in document.data.iterdescendants():
if item in added2.keys(): if item in added2:
level2 = added2[item] level2 = added2[item]
elif item == elem and level2 is not None: elif item == elem:
if level2 is None:
if previous_level2 is None:
break
level2 = previous_level2
text, _href = \ text, _href = \
self.elem_to_link(document, elem, counter) self.elem_to_link(document, elem, counter)
counter += 1 counter += 1
if text: if text:
level2.add(text, _href, level2.add(text, _href,
play_order=self.oeb.toc.next_play_order()) play_order=self.oeb.toc.next_play_order())
break

View File

@ -46,7 +46,8 @@ class Tokenize:
def __remove_uc_chars(self, startchar, token): def __remove_uc_chars(self, startchar, token):
for i in xrange(startchar, len(token)): for i in xrange(startchar, len(token)):
if token[i] == " ": #handle the case of an uc char with a terminating blank before ansi char
if token[i] == " " and self.__uc_char:
continue continue
elif self.__uc_char: elif self.__uc_char:
self.__uc_char -= 1 self.__uc_char -= 1

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__license__ = 'GPL v3'
from PyQt4.Qt import QDialog, QVBoxLayout, QLabel, QDialogButtonBox, \
QListWidget, QAbstractItemView
from PyQt4 import QtGui
class ChoosePluginToolbarsDialog(QDialog):
def __init__(self, parent, plugin, locations):
QDialog.__init__(self, parent)
self.locations = locations
self.setWindowTitle(
_('Add "%s" to toolbars or menus')%plugin.name)
self._layout = QVBoxLayout(self)
self.setLayout(self._layout)
self._header_label = QLabel(
_('Select the toolbars and/or menus to add <b>%s</b> to:') %
plugin.name)
self._layout.addWidget(self._header_label)
self._locations_list = QListWidget(self)
self._locations_list.setSelectionMode(QAbstractItemView.MultiSelection)
sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Preferred,
QtGui.QSizePolicy.Minimum)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
self._locations_list.setSizePolicy(sizePolicy)
for key, text in locations:
self._locations_list.addItem(text)
self._layout.addWidget(self._locations_list)
self._footer_label = QLabel(
_('You can also customise the plugin locations '
'using <b>Preferences -> Customise the toolbar</b>'))
self._layout.addWidget(self._footer_label)
button_box = QDialogButtonBox(QDialogButtonBox.Ok |
QDialogButtonBox.Cancel)
button_box.accepted.connect(self.accept)
button_box.rejected.connect(self.reject)
self._layout.addWidget(button_box)
self.resize(self.sizeHint())
def selected_locations(self):
selected = []
for row in self._locations_list.selectionModel().selectedRows():
selected.append(self.locations[row.row()])
return selected

View File

@ -22,7 +22,7 @@ from calibre.utils.icu import sort_key, strcmp as icu_strcmp
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \ from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
REGEXP_MATCH, MetadataBackup REGEXP_MATCH, MetadataBackup, force_to_bool
from calibre import strftime, isbytestring, prepare_string_for_xml from calibre import strftime, isbytestring, prepare_string_for_xml
from calibre.constants import filesystem_encoding, DEBUG from calibre.constants import filesystem_encoding, DEBUG
from calibre.gui2.library import DEFAULT_SORT from calibre.gui2.library import DEFAULT_SORT
@ -624,20 +624,7 @@ class BooksModel(QAbstractTableModel): # {{{
return None # displayed using a decorator return None # displayed using a decorator
def bool_type_decorator(r, idx=-1, bool_cols_are_tristate=True): def bool_type_decorator(r, idx=-1, bool_cols_are_tristate=True):
val = self.db.data[r][idx] val = force_to_bool(self.db.data[r][idx])
if isinstance(val, (str, unicode)):
try:
val = icu_lower(val)
if not val:
val = None
elif val in [_('yes'), _('checked'), 'true']:
val = True
elif val in [_('no'), _('unchecked'), 'false']:
val = False
else:
val = bool(int(val))
except:
val = None
if not bool_cols_are_tristate: if not bool_cols_are_tristate:
if val is None or not val: if val is None or not val:
return self.bool_no_icon return self.bool_no_icon

View File

@ -16,9 +16,10 @@ from calibre.customize.ui import initialized_plugins, is_disabled, enable_plugin
disable_plugin, plugin_customization, add_plugin, \ disable_plugin, plugin_customization, add_plugin, \
remove_plugin remove_plugin
from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \ from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \
question_dialog question_dialog, gprefs
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.icu import lower from calibre.utils.icu import lower
from calibre.utils.ordered_dict import OrderedDict
class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
@ -281,6 +282,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self._plugin_model.populate() self._plugin_model.populate()
self._plugin_model.reset() self._plugin_model.reset()
self.changed_signal.emit() self.changed_signal.emit()
self.check_for_add_to_toolbars(plugin)
info_dialog(self, _('Success'), info_dialog(self, _('Success'),
_('Plugin <b>{0}</b> successfully installed under <b>' _('Plugin <b>{0}</b> successfully installed under <b>'
' {1} plugins</b>. You may have to restart calibre ' ' {1} plugins</b>. You may have to restart calibre '
@ -342,6 +344,37 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
plugin.name + _(' cannot be removed. It is a ' plugin.name + _(' cannot be removed. It is a '
'builtin plugin. Try disabling it instead.')).exec_() 'builtin plugin. Try disabling it instead.')).exec_()
def check_for_add_to_toolbars(self, plugin):
from calibre.gui2.preferences.toolbar import ConfigWidget
from calibre.customize import InterfaceActionBase
if not isinstance(plugin, InterfaceActionBase):
return
all_locations = OrderedDict(ConfigWidget.LOCATIONS)
plugin_action = plugin.load_actual_plugin(self.gui)
installed_actions = OrderedDict([
(key, list(gprefs.get('action-layout-'+key, [])))
for key in all_locations])
# If already installed in a GUI container, do nothing
for action_names in installed_actions.itervalues():
if plugin_action.name in action_names:
return
allowed_locations = [(key, text) for key, text in
all_locations.iteritems() if key
not in plugin_action.dont_add_to]
if not allowed_locations:
return # This plugin doesn't want to live in the GUI
from calibre.gui2.dialogs.choose_plugin_toolbars import ChoosePluginToolbarsDialog
d = ChoosePluginToolbarsDialog(self, plugin_action, allowed_locations)
if d.exec_() == d.Accepted:
for key, text in d.selected_locations():
installed_actions = list(gprefs.get('action-layout-'+key, []))
installed_actions.append(plugin_action.name)
gprefs['action-layout-'+key] = tuple(installed_actions)
if __name__ == '__main__': if __name__ == '__main__':
from PyQt4.Qt import QApplication from PyQt4.Qt import QApplication

View File

@ -144,6 +144,22 @@ def _match(query, value, matchkind):
pass pass
return False return False
def force_to_bool(val):
if isinstance(val, (str, unicode)):
try:
val = icu_lower(val)
if not val:
val = None
elif val in [_('yes'), _('checked'), 'true']:
val = True
elif val in [_('no'), _('unchecked'), 'false']:
val = False
else:
val = bool(int(val))
except:
val = None
return val
class CacheRow(list): # {{{ class CacheRow(list): # {{{
def __init__(self, db, composites, val): def __init__(self, db, composites, val):
@ -532,37 +548,23 @@ class ResultCache(SearchQueryParser): # {{{
if item is None: if item is None:
continue continue
val = item[loc] val = force_to_bool(item[loc])
if isinstance(val, (str, unicode)):
try:
val = icu_lower(val)
if not val:
val = None
elif val in [_('yes'), _('checked'), 'true']:
val = True
elif val in [_('no'), _('unchecked'), 'false']:
val = False
else:
val = bool(int(val))
except:
val = None
if not bools_are_tristate: if not bools_are_tristate:
if val is None or not val: # item is None or set to false if val is None or not val: # item is None or set to false
if query in [_('no'), _('unchecked'), 'false']: if query in [_('no'), _('unchecked'), '_no', 'false']:
matches.add(item[0]) matches.add(item[0])
else: # item is explicitly set to true else: # item is explicitly set to true
if query in [_('yes'), _('checked'), 'true']: if query in [_('yes'), _('checked'), '_yes', 'true']:
matches.add(item[0]) matches.add(item[0])
else: else:
if val is None: if val is None:
if query in [_('empty'), _('blank'), 'false']: if query in [_('empty'), _('blank'), '_empty', 'false']:
matches.add(item[0]) matches.add(item[0])
elif not val: # is not None and false elif not val: # is not None and false
if query in [_('no'), _('unchecked'), 'true']: if query in [_('no'), _('unchecked'), '_no', 'true']:
matches.add(item[0]) matches.add(item[0])
else: # item is not None and true else: # item is not None and true
if query in [_('yes'), _('checked'), 'true']: if query in [_('yes'), _('checked'), '_yes', 'true']:
matches.add(item[0]) matches.add(item[0])
return matches return matches
@ -695,13 +697,14 @@ class ResultCache(SearchQueryParser): # {{{
if item is None: continue if item is None: continue
if not item[loc]: if not item[loc]:
if q == 'false': if q == 'false' and matchkind == CONTAINS_MATCH:
matches.add(item[0]) matches.add(item[0])
continue # item is empty. No possible matches below continue # item is empty. No possible matches below
if q == 'false': # Field has something in it, so a false query does not match if q == 'false'and matchkind == CONTAINS_MATCH:
# Field has something in it, so a false query does not match
continue continue
if q == 'true': if q == 'true' and matchkind == CONTAINS_MATCH:
if isinstance(item[loc], basestring): if isinstance(item[loc], basestring):
if item[loc].strip() == '': if item[loc].strip() == '':
continue continue
@ -989,18 +992,7 @@ class SortKeyGenerator(object):
val = 0.0 val = 0.0
dt = 'float' dt = 'float'
elif sb == 'bool': elif sb == 'bool':
try: val = force_to_bool(val)
v = icu_lower(val)
if not val:
val = None
elif v in [_('yes'), _('checked'), 'true']:
val = True
elif v in [_('no'), _('unchecked'), 'false']:
val = False
else:
val = bool(int(val))
except:
val = None
dt = 'bool' dt = 'bool'
if dt == 'datetime': if dt == 'datetime':

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -0,0 +1,108 @@
.. include:: global.rst
.. _subgroups-tutorial:
Managing subgroups of books, for example "genre"
==================================================
Some people wish to organize the books in their library into subgroups, similar to subfolders. The most common wish is to create genre hierarchies, but there are many others. One user asked for a way to organize textbooks by subject and course number. Another wanted to keep track of gifts by subject and recipient. I will use the genre example for the rest of this post.
Before I go on, please note that I am not talking about folders on the hard disk. Subgroups are not file folders. Books will not be copied anywhere. Calibre's library file structure is not affected. Instead, I am talking about a way to display subgroups of books within a calibre library.
.. contents::
:depth: 1
:local:
.. |sgtree| image:: images/sg_tree.jpg
:class: float-left-img
The commonly expressed requirements for subgroups such as genres are:
* A subgroup (e.g., a genre) must contain (point to) books, not categories of books. This is what distinguishes subgroups from user categories.
* A book can be in multiple subgroups (genres). This distinguishes subgroups from physical file folders.
* Subgroups (genres) must form a hierarchy; subgroups can contain subgroups.
|sgtree| Tags give you the first two. If you tag a book with the genre then you can use the tag browser (or search) for find the books with that genre, giving you the first. Many books can have the same tag, giving you the second. The problem is that tags don't satisfy the third requirement. They don't provide a hierarchy.
Calibre's new hierarchy feature gives you the third, the ability to see the genres in a 'tree' and the ability to easily search for books in genre or sub-genre. For example, assume that your genre structure is similar to the following::
Genre
. History
.. Japanese
.. Military
.. Roman
. Mysteries
.. English
.. Vampire
. Science Fiction
.. Alternate History
.. Military
.. Space Opera
. Thrillers
.. Crime
.. Horror
etc.
By using the hierarchy feature, you can see these genres in the tag browser in a tree form. As you can see, in this example the outermost level (Genre) is a custom column. The genres themselves appear under that column. Genres containing sub-genres appear with a small triangle next to them. Clicking on that triangle will open the item and show the sub-genres, as you see with History and Science Fiction.
Clicking on a genre will search for all books with that genre or children of that genre. For example, clicking on Science Fiction will give all three of the child genres, Alternate History, Military, and Space Opera. Clicking on Alternate History will give books in that genre, ignoring those in Military and Space Opera. Of course, a book can have multiple genres. If a book has both Space Opera and Military genres, then you see that book if you click on either genre. Searching is discussed in more detail below.
Another thing you can see from the image is that the genre Military appears twice, once under History and once under Science Fiction. Because the genres are in a hierarchy, these are two separate genres. A book can be in one, the other, or (doubtfully in this case) both. For example, Winston Churchill's World War II books could be in "History.Military". David Weber's Honor Harrington books could be in "Science Fiction.Military", and in "Science Fiction.Space Opera" for that matter.
Once a genre exists, that is the genre has been applied to at least one book, you can easily apply it to other books by dragging a book from the library view onto the genre you want the book to have. You can also apply them in the metadata editors. More on this below.
Setup
----------------------------------------
Your question by now might be "how did I set all of this up?". There are three steps: 1) create the custom column, 2) tell calibre that the new column is to be treated as a hierarchy, and 3) add genres.
I created the custom column in the usual way, using Preferences -> Add your own columns. I used "genre" as the lookup name and "Genre" as the column heading. The column type is "Comma-separated text, like tags, shown in the tag browser."
.. image:: images/sg_tree.jpg
:align: center
Then after restarting calibre, I told calibre that the column is to be treated as a hierarchy. I went to Preferences -> Look and Feel and entered the lookup name "#genre" into the "Categories with hierarchical items" box. I pressed Apply and was done with setting up.
.. image:: images/sg_pref.jpg
:align: center
At the point there are no genres. We are left with the last step: how to apply a genre to a book. A genre does not exist until it appears on at least one book. To apply a genre for the first time, we must go into some detail about what a genre looks like in the metadata for a book.
A hierarchy of 'things' is built by creating an item consisting of phrases separated by periods. Continuing the Genre example, these items would "History.Military", "Mysteries.Vampire", "Science Fiction.Space Opera", etc. Thus to create a new genre, you pick a book that should have that genre, edit its metadata, and enter the new genre into the column you created. Continuing my example, if I want to assign a new genre "Comics" with a sub-genre "Superheros" to a book, I would 'edit metadata' for that (comic) book, choose the Custom metadata tab, and then enter "Comics.Superheros" as shown in the following (ignore my other custom columns):
.. image:: images/sg_genre.jpg
:align: center
After I do the above, I see in the tag browser:
.. image:: images/sg_tb.jpg
:align: center
From here on, to apply this new genre to a book (a comic book, presumably), I can either drag the book onto the genre, or add it to the book using edit metadata in exactly the same way as I did above.
Searching
---------------
.. image:: images/sg_search.jpg
:align: center
The easiest way to search for genres is to use the tag browser, clicking on the genre you want to see. Clicking on a genre with children will show you books with that genre and all child genres. However, this might bring up a question. Just because a genre has children doesn't mean that it isn't a genre in its own right. For example, a book can have the genre "History" but not "History.Military". How do I search for books with only "History"?
The tag browser search mechanism knows if an item has children. If it does, clicking on the item cycles through 5 searches instead of the normal three. The first is the normal green plus, which shows you books with that genre only. The second is new: a doubled plus (shown below), which shows you books with that genre and all sub-genres. The third is the normal red minus, which shows you books without that exact genre. The fourth is new: a doubled minus, which shows you books without that genre or sub-genres. The fifth is back to the beginning, no mark, meaning no search.
Restrictions
---------------
If you search for a genre then create a saved search, you can use the 'restrict to' box to create a virtual library of books with that genre. This is most useful if you want to do other searches within the genre or to manage/update metadata. For this example I created a saved search named 'History.Japanese' by first clicking on the genre Japanese in the tag browser to get a search into the search box, entering History.Japanese into the saved search box, then pushing the "save search" button (the green box with the white plus, on the right-hand side).
.. image:: images/sg_restrict.jpg
:align: center
Once I have done that, then I can use this search as a restriction.
.. image:: images/sg_restrict2.jpg
:align: center

View File

@ -112,6 +112,8 @@ Functions are always applied before format specifications. See further down for
The syntax for using functions is ``{field:function(arguments)}``, or ``{field:function(arguments)|prefix|suffix}``. Arguments are separated by commas. Commas inside arguments must be preceeded by a backslash ( '\\' ). The last (or only) argument cannot contain a closing parenthesis ( ')' ). Functions return the value of the field used in the template, suitably modified. The syntax for using functions is ``{field:function(arguments)}``, or ``{field:function(arguments)|prefix|suffix}``. Arguments are separated by commas. Commas inside arguments must be preceeded by a backslash ( '\\' ). The last (or only) argument cannot contain a closing parenthesis ( ')' ). Functions return the value of the field used in the template, suitably modified.
If you have programming experience, please note that the syntax in this mode (single function) is not what you might expect. Strings are not quoted. Spaces are significant. All arguments must be constants; there is no sub-evaluation. Use :ref:`template program mode <template_mode>` and :ref:`general program mode <general_mode>` to avoid these differences.
The functions available are: The functions available are:
* ``lowercase()`` -- return value of the field in lower case. * ``lowercase()`` -- return value of the field in lower case.
@ -127,10 +129,25 @@ The functions available are:
* ``switch(pattern, value, pattern, value, ..., else_value)`` -- for each ``pattern, value`` pair, checks if the field matches the regular expression ``pattern`` and if so, returns that ``value``. If no ``pattern`` matches, then ``else_value`` is returned. You can have as many ``pattern, value`` pairs as you want. * ``switch(pattern, value, pattern, value, ..., else_value)`` -- for each ``pattern, value`` pair, checks if the field matches the regular expression ``pattern`` and if so, returns that ``value``. If no ``pattern`` matches, then ``else_value`` is returned. You can have as many ``pattern, value`` pairs as you want.
* ``lookup(pattern, field, pattern, field, ..., else_field)`` -- like switch, except the arguments are field (metadata) names, not text. The value of the appropriate field will be fetched and used. Note that because composite columns are fields, you can use this function in one composite field to use the value of some other composite field. This is extremely useful when constructing variable save paths (more later). * ``lookup(pattern, field, pattern, field, ..., else_field)`` -- like switch, except the arguments are field (metadata) names, not text. The value of the appropriate field will be fetched and used. Note that because composite columns are fields, you can use this function in one composite field to use the value of some other composite field. This is extremely useful when constructing variable save paths (more later).
* ``select(key)`` -- interpret the field as a comma-separated list of items, with the items being of the form "id:value". Find the pair with the id equal to key, and return the corresponding value. This function is particularly useful for extracting a value such as an isbn from the set of identifiers for a book. * ``select(key)`` -- interpret the field as a comma-separated list of items, with the items being of the form "id:value". Find the pair with the id equal to key, and return the corresponding value. This function is particularly useful for extracting a value such as an isbn from the set of identifiers for a book.
* ``subitems(val, start_index, end_index)`` -- This function is used to break apart lists of tag-like hierarchical items such as genres. It interprets the value as a comma-separated list of tag-like items, where each item is a period-separated list. Returns a new list made by first finding all the period-separated tag-like items, then for each such item extracting the `start_index` th to the `end_index` th components, then combining the results back together. The first component in a period-separated list has an index of zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples::
Assuming a #genre column containing "A.B.C":
{#genre:subitems(0,1)} returns "A"
{#genre:subitems(0,2)} returns "A.B"
{#genre:subitems(1,0)} returns "B.C"
Assuming a #genre column containing "A.B.C, D.E":
{#genre:subitems(0,1)} returns "A, D"
{#genre:subitems(0,2)} returns "A.B, D.E"
* ``sublist(val, start_index, end_index, separator)`` -- interpret the value as a list of items separated by `separator`, returning a new list made from the `start_index` th to the `end_index` th item. The first item is number zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples assuming that the tags column (which is comma-separated) contains "A, B ,C"::
{tags:sublist(0,1,\,)} returns "A"
{tags:sublist(-1,0,\,)} returns "C"
{tags:sublist(0,-1,\,)} returns "A, B"
* ``test(text if not empty, text if empty)`` -- return `text if not empty` if the field is not empty, otherwise return `text if empty`. * ``test(text if not empty, text if empty)`` -- return `text if not empty` if the field is not empty, otherwise return `text if empty`.
Now, what about using functions and formatting in the same field. Suppose you have an integer custom column called ``#myint`` that you want to see with leading zeros, as in ``003``. To do this, you would use a format of ``0>3s``. However, by default, if a number (integer or float) equals zero then the field produces the empty value, so zero values will produce nothing, not ``000``. If you really want to see ``000`` values, then you use both the format string and the ``ifempty`` function to change the empty value back to a zero. The field reference would be::
Now, about using functions and formatting in the same field. Suppose you have an integer custom column called ``#myint`` that you want to see with leading zeros, as in ``003``. To do this, you would use a format of ``0>3s``. However, by default, if a number (integer or float) equals zero then the field produces the empty value, so zero values will produce nothing, not ``000``. If you really want to see ``000`` values, then you use both the format string and the ``ifempty`` function to change the empty value back to a zero. The field reference would be::
{#myint:0>3s:ifempty(0)} {#myint:0>3s:ifempty(0)}
@ -138,6 +155,7 @@ Note that you can use the prefix and suffix as well. If you want the number to a
{#myint:0>3s:ifempty(0)|[|]} {#myint:0>3s:ifempty(0)|[|]}
.. _template_mode:
Using functions in templates - template program mode Using functions in templates - template program mode
---------------------------------------------------- ----------------------------------------------------
@ -238,6 +256,8 @@ The following functions are available in addition to those described in single-f
* ``subtract(x, y)`` -- returns x - y. Throws an exception if either x or y are not numbers. * ``subtract(x, y)`` -- returns x - y. Throws an exception if either x or y are not numbers.
* ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value. * ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value.
.. _general_mode:
Using general program mode Using general program mode
----------------------------------- -----------------------------------

View File

@ -12,6 +12,7 @@ Here you will find tutorials to get you started using |app|'s more advanced feat
:maxdepth: 1 :maxdepth: 1
news news
sub_groups
xpath xpath
template_lang template_lang
regexp regexp

View File

@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
import inspect, re, traceback, sys import inspect, re, traceback, sys
from calibre.utils.titlecase import titlecase from calibre.utils.titlecase import titlecase
from calibre.utils.icu import capitalize, strcmp from calibre.utils.icu import capitalize, strcmp, sort_key
from calibre.utils.date import parse_date, format_date from calibre.utils.date import parse_date, format_date
@ -427,15 +427,16 @@ class BuiltinSublist(BuiltinFormatterFunction):
name = 'sublist' name = 'sublist'
arg_count = 4 arg_count = 4
doc = _('sublist(val, start_index, end_index, separator) -- interpret the ' doc = _('sublist(val, start_index, end_index, separator) -- interpret the '
' value as a list of items separated by `separator`, returning a ' 'value as a list of items separated by `separator`, returning a '
' new list made from the `start_index`th to the `end_index`th item. ' 'new list made from the `start_index`th to the `end_index`th item. '
'The first item is number zero. If an index is negative, then it ' 'The first item is number zero. If an index is negative, then it '
'counts from the end of the list. As a special case, an end_index ' 'counts from the end of the list. As a special case, an end_index '
'of zero is assumed to be the length of the list. Examples using ' 'of zero is assumed to be the length of the list. Examples using '
'basic template mode and assuming a #genre value if A.B.C: ' 'basic template mode and assuming that the tags column (which is '
'{#genre:sublist(-1,0,.)} returns C<br/>' 'comma-separated) contains "A, B, C": '
'{#genre:sublist(0,1,.)} returns A<br/>' '{tags:sublist(0,1,\,)} returns "A". '
'{#genre:sublist(0,-1,.)} returns A.B') '{tags:sublist(-1,0,\,)} returns "C". '
'{tags:sublist(0,-1,\,)} returns "A, B".')
def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep): def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):
if not val: if not val:
@ -451,6 +452,43 @@ class BuiltinSublist(BuiltinFormatterFunction):
except: except:
return '' return ''
class BuiltinSubitems(BuiltinFormatterFunction):
name = 'subitems'
arg_count = 3
doc = _('subitems(val, start_index, end_index) -- This function is used to '
'break apart lists of items such as genres. It interprets the value '
'as a comma-separated list of items, where each item is a period-'
'separated list. Returns a new list made by first finding all the '
'period-separated items, then for each such item extracting the '
'start_index`th to the `end_index`th components, then combining '
'the results back together. The first component in a period-'
'separated list has an index of zero. If an index is negative, '
'then it counts from the end of the list. As a special case, an '
'end_index of zero is assumed to be the length of the list. '
'Example using basic template mode and assuming a #genre value of '
'"A.B.C": {#genre:subitems(0,1)} returns "A". {#genre:subitems(0,2)} '
'returns "A.B". {#genre:subitems(1,0)} returns "B.C". Assuming a #genre '
'value of "A.B.C, D.E.F", {#genre:subitems(0,1)} returns "A, D". '
'{#genre:subitems(0,2)} returns "A.B, D.E"')
def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index):
if not val:
return ''
si = int(start_index)
ei = int(end_index)
items = [v.strip() for v in val.split(',')]
rv = set()
for item in items:
component = item.split('.')
try:
if ei == 0:
rv.add('.'.join(component[si:]))
else:
rv.add('.'.join(component[si:ei]))
except:
pass
return ', '.join(sorted(rv, key=sort_key))
class BuiltinFormat_date(BuiltinFormatterFunction): class BuiltinFormat_date(BuiltinFormatterFunction):
name = 'format_date' name = 'format_date'
arg_count = 2 arg_count = 2
@ -532,6 +570,7 @@ builtin_select = BuiltinSelect()
builtin_shorten = BuiltinShorten() builtin_shorten = BuiltinShorten()
builtin_strcat = BuiltinStrcat() builtin_strcat = BuiltinStrcat()
builtin_strcmp = BuiltinStrcmp() builtin_strcmp = BuiltinStrcmp()
builtin_subitems = BuiltinSubitems()
builtin_sublist = BuiltinSublist() builtin_sublist = BuiltinSublist()
builtin_substr = BuiltinSubstr() builtin_substr = BuiltinSubstr()
builtin_subtract = BuiltinSubtract() builtin_subtract = BuiltinSubtract()