Merge from trunk

This commit is contained in:
Charles Haley 2011-03-10 13:19:32 +00:00
commit 0e1dcf9f06
26 changed files with 709 additions and 67 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 495 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 414 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 840 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 302 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 556 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 437 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 728 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 494 B

View File

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
cotidianul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Cotidianul(BasicNewsRecipe):
title = u'Cotidianul'
__author__ = u'Silviu Cotoar\u0103'
description = u''
publisher = u'Cotidianul'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.cotidianul.ro/images/cotidianul.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'titlu'})
, dict(name='div', attrs={'class':'gallery clearfix'})
, dict(name='div', attrs={'align':'justify'})
]
remove_tags = [
dict(name='div', attrs={'class':['space']})
, dict(name='div', attrs={'id':['title_desc']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['space']})
, dict(name='span', attrs={'class':['date']})
]
feeds = [
(u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
ele.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Ele(BasicNewsRecipe):
title = u'Ele'
__author__ = u'Silviu Cotoar\u0103'
description = u'Dezv\u0103luie ceea ce e\u015fti'
publisher = u'Ele'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Femei'
encoding = 'utf-8'
cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='h1', attrs={'class':'article_title'})
, dict(name='div', attrs={'class':'article_text'})
]
feeds = [
(u'Feeds', u'http://www.ele.ro/rss_must_read')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
revistafelicia.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Felicia(BasicNewsRecipe):
title = u'Revista Felicia'
__author__ = u'Silviu Cotoar\u0103'
description = u'O revist\u0103 pentru sufletul t\u0103u'
publisher = u'Revista Felicia'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste'
encoding = 'utf-8'
cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'header'})
, dict(name='div', attrs={'id':'contentArticol'})
]
remove_tags = [
dict(name='img',attrs={'src':['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']})
, dict(name='div',attrs={'class':['content']})
]
feeds = [
(u'Feeds', u'http://www.revistafelicia.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
financiarul.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Financiarul(BasicNewsRecipe):
title = u'Financiarul'
__author__ = u'Silviu Cotoar\u0103'
description = u'FIN.ro'
publisher = u'Financiarul'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.financiarul.com/templates/default/images/logo.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'col2ContentLeftL'})
]
remove_tags = [
dict(name='div',attrs={'class':['infoArticol']})
, dict(name='ul', attrs={'class':'navSectiuni'})
, dict(name='div', attrs={'class':'separator separatorTop'})
, dict(name='div', attrs={'class':'infoArticol infoArticolBottom'})
, dict(name='ul', attrs={'class':['related']})
, dict(name='div', attrs={'class':['slot panel300 panelGri300 panelGri300s panelGri300sm']})
]
remove_tags_after = [
dict(name='ul', attrs={'class':['related']})
]
feeds = [
(u'Feeds', u'http://www.financiarul.com/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
imperatortravel.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Imperatortravel(BasicNewsRecipe):
title = u'Imperator Travel'
__author__ = u'Silviu Cotoar\u0103'
description = u'C\u0103l\u0103torii'
publisher = u'Imperator Travel'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Turism,Calatorii'
encoding = 'utf-8'
cover_url = 'http://www.imperatortravel.ro/images/header-1.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'article first_main_article'})
]
remove_tags = [
dict(name='div', attrs={'class':['meta']})
, dict(name='body', attrs={'class':['transparent_widget ff3 win Locale_en_US']})
, dict(name='div', attrs={'class':['connect_widget']})
, dict(name='ul', attrs={'class':['similar-posts']})
]
remove_tags_after = [
dict(name='ul', attrs={'class':['similar-posts']})
]
feeds = [
(u'Feeds', u'http://feeds.feedburner.com/ImperatorTravels')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
lanacion.com.ar lanacion.com.ar
''' '''
@ -19,9 +19,10 @@ class Lanacion(BasicNewsRecipe):
language = 'es_AR' language = 'es_AR'
publication_type = 'newspaper' publication_type = 'newspaper'
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif' masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif'
extra_css = """ h1{font-family: Georgia,serif} extra_css = """
h2{color: #626262} h1{font-family: Georgia,serif}
h2{color: #626262; font-weight: normal; font-size: 1.1em}
body{font-family: Arial,sans-serif} body{font-family: Arial,sans-serif}
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
.notaFecha{color: #808080} .notaFecha{color: #808080}
@ -37,47 +38,78 @@ class Lanacion(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags = [dict(name='div', attrs={'class':['nota floatFix','topNota','nota','post']})] keep_only_tags = [dict(name='div', attrs={'id':'content'})]
remove_tags = [ remove_tags = [
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' }) dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']}) ,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']})
,dict(name='div' , attrs={'class':['cajaHerramientas noprint','cajaHerramientas floatFix'] }) ,dict(name='div' , attrs={'class':['titulosMultimedia','herramientas noprint','cajaHerramientas noprint','cajaHerramientas floatFix'] })
,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']}) ,dict(attrs={'class':['izquierda','espacio17','espacio10','espacio20','floatFix ultimasNoticias','relacionadas','titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input']) ,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
] ]
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']}) remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
remove_attributes = ['height','width','visible','onclick','data-count','name'] remove_attributes = ['height','width','visible','onclick','data-count','name']
feeds = [ feeds = [
(u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' ) (u'Ultimas Noticias' , u'http://servicios.lanacion.com.ar/herramientas/rss/origen=2' )
,(u'Politica' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=30' ) ,(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
,(u'Economia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=272' ) ,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
,(u'Deportes' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=131' ) ,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
,(u'Informacion General' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=21' ) ,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' )
,(u'Cultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1' ) ,(u'Cultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1' )
,(u'Opinion' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=28' ) ,(u'Opinion' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=28' )
,(u'Espectaculos' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=120' ) ,(u'Espectaculos' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=120' )
,(u'Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7' ) ,(u'Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7' )
,(u'Ciencia&Salud' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=498' ) ,(u'Ciencia&Salud' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=498' )
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' ) ,(u'Revista' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=494' )
,(u'Enfoques' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=421' ) ,(u'Enfoques' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=421' )
,(u'Comercio Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=347' ) ,(u'Comercio Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=347' )
,(u'Tecnologia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=432' ) ,(u'Tecnologia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=432' )
,(u'Arquitectura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=366' ) ,(u'Arquitectura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=366' )
,(u'Turismo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=504' ) ,(u'Turismo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=504' )
,(u'Al volante' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=371' ) ,(u'Al volante' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=371' )
,(u'El Campo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=337' ) ,(u'El Campo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=337' )
,(u'Moda y Belleza' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1312' ) ,(u'Moda y Belleza' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1312')
,(u'Inmuebles Comerciales', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1363' ) ,(u'Inmuebles Comerciales', u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1363')
,(u'Countries' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1348' ) ,(u'Countries' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1348')
,(u'adnCultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6734' ) ,(u'adnCultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6734')
,(u'The Wall Street Journal Americas', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6373' ) ,(u'The WSJ Americas' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6373')
,(u'Estilo de vida' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7353' ) ,(u'Comunidad' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1344')
,(u'Management' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7380' ) ,(u'Management' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7380')
,(u'Bicentenario' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7276' ) ,(u'Bicentenario' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7276')
] ]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_debug_redirects(True)
br.set_debug_responses(True)
br.set_debug_http(True)
return br
def get_article_url(self, article):
link = BasicNewsRecipe.get_article_url(self,article)
if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
return None
return link
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return self.adeify_images(soup) for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
monden.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Monden(BasicNewsRecipe):
title = u'Monden'
__author__ = u'Silviu Cotoar\u0103'
description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102'
publisher = u'Monden'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Muzica'
encoding = 'utf-8'
cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':'content'})
]
remove_tags = [
dict(name='div', attrs={'class':['postAuthor']})
, dict(name='div', attrs={'class':['postLike']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['postLike']})
]
feeds = [
(u'Feeds', u'http://www.monden.info/feed/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,23 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1299640653(BasicNewsRecipe):
title = u'Oakland North'
oldest_article = 30
max_articles_per_feed = 100
language = 'en'
__author__ = 'noah'
description = 'Oakland North'
category = 'news'
no_stylesheets = True
masthead_url = 'http://oaklandnorth.net/wp-content/themes/oaklandnorth/images/masthead.png'
keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b(?!-)', re.IGNORECASE)})]
remove_tags_after = [dict(name='p', attrs={'class':'post-postscript'})]
remove_tags = [dict(name='p', attrs={'class':'post-postscript'})]
feeds = [(u'All Headlines', u'http://oaklandnorth.net/feed/')]

View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
promotor.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Promotor(BasicNewsRecipe):
title = u'Promotor'
__author__ = u'Silviu Cotoar\u0103'
description = u'Auto-moto'
publisher = u'Promotor'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,TV,Auto'
encoding = 'utf-8'
cover_url = 'http://www.promotor.ro/images/logo_promotor.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'casetatitluarticol'})
, dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'})
, dict(name='div', attrs={'class':'textb'})
, dict(name='div', attrs={'class':'contentarticol'})
]
remove_tags = [
dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
, dict(name='div', attrs={'class':['etichetagry']})
, dict(name='span', attrs={'class':['textb']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['etichetagry']})
, dict(name='span', attrs={'class':['textb']})
]
feeds = [
(u'Feeds', u'http://www.promotor.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
timesnewroman.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TimesNewRoman(BasicNewsRecipe):
title = u'Times New Roman'
__author__ = u'Silviu Cotoar\u0103'
description = u'Cotidian independent de umor voluntar'
publisher = u'Times New Roman'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Fun'
encoding = 'utf-8'
cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'page'})
]
remove_tags = [
dict(name='p', attrs={'class':['articleinfo']})
, dict(name='div',attrs={'class':['vergefacebooklike']})
, dict(name='div', attrs={'class':'cleared'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'cleared'})
]
feeds = [
(u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -30,7 +30,6 @@ def strftime(epoch, zone=time.gmtime):
def get_connected_device(): def get_connected_device():
from calibre.customize.ui import device_plugins from calibre.customize.ui import device_plugins
from calibre.devices.scanner import DeviceScanner from calibre.devices.scanner import DeviceScanner
import uuid
dev = None dev = None
scanner = DeviceScanner() scanner = DeviceScanner()
scanner.scan() scanner.scan()
@ -48,7 +47,7 @@ def get_connected_device():
for d in connected_devices: for d in connected_devices:
try: try:
d.open(str(uuid.uuid4())) d.open()
except: except:
continue continue
else: else:

View File

@ -6,7 +6,7 @@ Provides a command-line and optional graphical interface to the SONY Reader PRS-
For usage information run the script. For usage information run the script.
""" """
import StringIO, sys, time, os, uuid import StringIO, sys, time, os
from optparse import OptionParser from optparse import OptionParser
from calibre import __version__, __appname__ from calibre import __version__, __appname__
@ -213,7 +213,7 @@ def main():
for d in connected_devices: for d in connected_devices:
try: try:
d.open(str(uuid.uuid4())) d.open(None)
except: except:
continue continue
else: else:

View File

@ -25,7 +25,7 @@ class DRMError(ValueError):
class ParserError(ValueError): class ParserError(ValueError):
pass pass
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'htm', 'xhtm', BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb'] 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb']

View File

@ -13,6 +13,7 @@ from urlparse import urlparse
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
from calibre.ebooks import ConversionError from calibre.ebooks import ConversionError
from calibre.utils.ordered_dict import OrderedDict
def XPath(x): def XPath(x):
try: try:
@ -95,10 +96,8 @@ class DetectStructure(object):
self.log.exception('Failed to mark chapter') self.log.exception('Failed to mark chapter')
def create_level_based_toc(self): def create_level_based_toc(self):
if self.opts.level1_toc is None: if self.opts.level1_toc is not None:
return self.add_leveled_toc_items()
for item in self.oeb.spine:
self.add_leveled_toc_items(item)
def create_toc_from_chapters(self): def create_toc_from_chapters(self):
counter = self.oeb.toc.next_play_order() counter = self.oeb.toc.next_play_order()
@ -145,49 +144,57 @@ class DetectStructure(object):
return text, href return text, href
def add_leveled_toc_items(self, item): def add_leveled_toc_items(self):
level1 = XPath(self.opts.level1_toc)(item.data) added = OrderedDict()
level1_order = [] added2 = OrderedDict()
document = item
counter = 1 counter = 1
if level1: for document in self.oeb.spine:
added = {} previous_level1 = list(added.itervalues())[-1] if added else None
for elem in level1: previous_level2 = list(added2.itervalues())[-1] if added2 else None
for elem in XPath(self.opts.level1_toc)(document.data):
text, _href = self.elem_to_link(document, elem, counter) text, _href = self.elem_to_link(document, elem, counter)
counter += 1 counter += 1
if text: if text:
node = self.oeb.toc.add(text, _href, node = self.oeb.toc.add(text, _href,
play_order=self.oeb.toc.next_play_order()) play_order=self.oeb.toc.next_play_order())
level1_order.append(node)
added[elem] = node added[elem] = node
#node.add(_('Top'), _href) #node.add(_('Top'), _href)
if self.opts.level2_toc is not None:
added2 = {} if self.opts.level2_toc is not None and added:
level2 = list(XPath(self.opts.level2_toc)(document.data)) for elem in XPath(self.opts.level2_toc)(document.data):
for elem in level2:
level1 = None level1 = None
for item in document.data.iterdescendants(): for item in document.data.iterdescendants():
if item in added.keys(): if item in added:
level1 = added[item] level1 = added[item]
elif item == elem and level1 is not None: elif item == elem:
if level1 is None:
if previous_level1 is None:
break
level1 = previous_level1
text, _href = self.elem_to_link(document, elem, counter) text, _href = self.elem_to_link(document, elem, counter)
counter += 1 counter += 1
if text: if text:
added2[elem] = level1.add(text, _href, added2[elem] = level1.add(text, _href,
play_order=self.oeb.toc.next_play_order()) play_order=self.oeb.toc.next_play_order())
if self.opts.level3_toc is not None: break
level3 = list(XPath(self.opts.level3_toc)(document.data))
for elem in level3: if self.opts.level3_toc is not None and added2:
for elem in XPath(self.opts.level3_toc)(document.data):
level2 = None level2 = None
for item in document.data.iterdescendants(): for item in document.data.iterdescendants():
if item in added2.keys(): if item in added2:
level2 = added2[item] level2 = added2[item]
elif item == elem and level2 is not None: elif item == elem:
if level2 is None:
if previous_level2 is None:
break
level2 = previous_level2
text, _href = \ text, _href = \
self.elem_to_link(document, elem, counter) self.elem_to_link(document, elem, counter)
counter += 1 counter += 1
if text: if text:
level2.add(text, _href, level2.add(text, _href,
play_order=self.oeb.toc.next_play_order()) play_order=self.oeb.toc.next_play_order())
break

View File

@ -46,7 +46,8 @@ class Tokenize:
def __remove_uc_chars(self, startchar, token): def __remove_uc_chars(self, startchar, token):
for i in xrange(startchar, len(token)): for i in xrange(startchar, len(token)):
if token[i] == " ": #handle the case of an uc char with a terminating blank before ansi char
if token[i] == " " and self.__uc_char:
continue continue
elif self.__uc_char: elif self.__uc_char:
self.__uc_char -= 1 self.__uc_char -= 1

View File

@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin):
name = 'TXT Input' name = 'TXT Input'
author = 'John Schember' author = 'John Schember'
description = 'Convert TXT files to HTML' description = 'Convert TXT files to HTML'
file_types = set(['txt', 'txtz']) file_types = set(['txt', 'txtz', 'text'])
options = set([ options = set([
OptionRecommendation(name='paragraph_type', recommended_value='auto', OptionRecommendation(name='paragraph_type', recommended_value='auto',

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__license__ = 'GPL v3'
from PyQt4.Qt import QDialog, QVBoxLayout, QLabel, QDialogButtonBox, \
QListWidget, QAbstractItemView
from PyQt4 import QtGui
class ChoosePluginToolbarsDialog(QDialog):
def __init__(self, parent, plugin, locations):
QDialog.__init__(self, parent)
self.locations = locations
self.setWindowTitle(
_('Add "%s" to toolbars or menus')%plugin.name)
self._layout = QVBoxLayout(self)
self.setLayout(self._layout)
self._header_label = QLabel(
_('Select the toolbars and/or menus to add <b>%s</b> to:') %
plugin.name)
self._layout.addWidget(self._header_label)
self._locations_list = QListWidget(self)
self._locations_list.setSelectionMode(QAbstractItemView.MultiSelection)
sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Preferred,
QtGui.QSizePolicy.Minimum)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
self._locations_list.setSizePolicy(sizePolicy)
for key, text in locations:
self._locations_list.addItem(text)
self._layout.addWidget(self._locations_list)
self._footer_label = QLabel(
_('You can also customise the plugin locations '
'using <b>Preferences -> Customise the toolbar</b>'))
self._layout.addWidget(self._footer_label)
button_box = QDialogButtonBox(QDialogButtonBox.Ok |
QDialogButtonBox.Cancel)
button_box.accepted.connect(self.accept)
button_box.rejected.connect(self.reject)
self._layout.addWidget(button_box)
self.resize(self.sizeHint())
def selected_locations(self):
selected = []
for row in self._locations_list.selectionModel().selectedRows():
selected.append(self.locations[row.row()])
return selected

View File

@ -16,9 +16,10 @@ from calibre.customize.ui import initialized_plugins, is_disabled, enable_plugin
disable_plugin, plugin_customization, add_plugin, \ disable_plugin, plugin_customization, add_plugin, \
remove_plugin remove_plugin
from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \ from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \
question_dialog question_dialog, gprefs
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.icu import lower from calibre.utils.icu import lower
from calibre.utils.ordered_dict import OrderedDict
class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
@ -281,6 +282,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self._plugin_model.populate() self._plugin_model.populate()
self._plugin_model.reset() self._plugin_model.reset()
self.changed_signal.emit() self.changed_signal.emit()
self.check_for_add_to_toolbars(plugin)
info_dialog(self, _('Success'), info_dialog(self, _('Success'),
_('Plugin <b>{0}</b> successfully installed under <b>' _('Plugin <b>{0}</b> successfully installed under <b>'
' {1} plugins</b>. You may have to restart calibre ' ' {1} plugins</b>. You may have to restart calibre '
@ -342,6 +344,37 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
plugin.name + _(' cannot be removed. It is a ' plugin.name + _(' cannot be removed. It is a '
'builtin plugin. Try disabling it instead.')).exec_() 'builtin plugin. Try disabling it instead.')).exec_()
def check_for_add_to_toolbars(self, plugin):
from calibre.gui2.preferences.toolbar import ConfigWidget
from calibre.customize import InterfaceActionBase
if not isinstance(plugin, InterfaceActionBase):
return
all_locations = OrderedDict(ConfigWidget.LOCATIONS)
plugin_action = plugin.load_actual_plugin(self.gui)
installed_actions = OrderedDict([
(key, list(gprefs.get('action-layout-'+key, [])))
for key in all_locations])
# If already installed in a GUI container, do nothing
for action_names in installed_actions.itervalues():
if plugin_action.name in action_names:
return
allowed_locations = [(key, text) for key, text in
all_locations.iteritems() if key
not in plugin_action.dont_add_to]
if not allowed_locations:
return # This plugin doesn't want to live in the GUI
from calibre.gui2.dialogs.choose_plugin_toolbars import ChoosePluginToolbarsDialog
d = ChoosePluginToolbarsDialog(self, plugin_action, allowed_locations)
if d.exec_() == d.Accepted:
for key, text in d.selected_locations():
installed_actions = list(gprefs.get('action-layout-'+key, []))
installed_actions.append(plugin_action.name)
gprefs['action-layout-'+key] = tuple(installed_actions)
if __name__ == '__main__': if __name__ == '__main__':
from PyQt4.Qt import QApplication from PyQt4.Qt import QApplication