Merge from trunk

This commit is contained in:
Charles Haley 2011-03-10 13:19:32 +00:00
commit 0e1dcf9f06
26 changed files with 709 additions and 67 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 495 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 414 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 840 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 302 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 556 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 437 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 728 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 494 B

View File

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
cotidianul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Cotidianul(BasicNewsRecipe):
title = u'Cotidianul'
__author__ = u'Silviu Cotoar\u0103'
description = u''
publisher = u'Cotidianul'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.cotidianul.ro/images/cotidianul.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'titlu'})
, dict(name='div', attrs={'class':'gallery clearfix'})
, dict(name='div', attrs={'align':'justify'})
]
remove_tags = [
dict(name='div', attrs={'class':['space']})
, dict(name='div', attrs={'id':['title_desc']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['space']})
, dict(name='span', attrs={'class':['date']})
]
feeds = [
(u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
ele.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Ele(BasicNewsRecipe):
title = u'Ele'
__author__ = u'Silviu Cotoar\u0103'
description = u'Dezv\u0103luie ceea ce e\u015fti'
publisher = u'Ele'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Femei'
encoding = 'utf-8'
cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='h1', attrs={'class':'article_title'})
, dict(name='div', attrs={'class':'article_text'})
]
feeds = [
(u'Feeds', u'http://www.ele.ro/rss_must_read')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
revistafelicia.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Felicia(BasicNewsRecipe):
title = u'Revista Felicia'
__author__ = u'Silviu Cotoar\u0103'
description = u'O revist\u0103 pentru sufletul t\u0103u'
publisher = u'Revista Felicia'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste'
encoding = 'utf-8'
cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'header'})
, dict(name='div', attrs={'id':'contentArticol'})
]
remove_tags = [
dict(name='img',attrs={'src':['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']})
, dict(name='div',attrs={'class':['content']})
]
feeds = [
(u'Feeds', u'http://www.revistafelicia.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
financiarul.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Financiarul(BasicNewsRecipe):
title = u'Financiarul'
__author__ = u'Silviu Cotoar\u0103'
description = u'FIN.ro'
publisher = u'Financiarul'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.financiarul.com/templates/default/images/logo.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'col2ContentLeftL'})
]
remove_tags = [
dict(name='div',attrs={'class':['infoArticol']})
, dict(name='ul', attrs={'class':'navSectiuni'})
, dict(name='div', attrs={'class':'separator separatorTop'})
, dict(name='div', attrs={'class':'infoArticol infoArticolBottom'})
, dict(name='ul', attrs={'class':['related']})
, dict(name='div', attrs={'class':['slot panel300 panelGri300 panelGri300s panelGri300sm']})
]
remove_tags_after = [
dict(name='ul', attrs={'class':['related']})
]
feeds = [
(u'Feeds', u'http://www.financiarul.com/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
imperatortravel.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Imperatortravel(BasicNewsRecipe):
title = u'Imperator Travel'
__author__ = u'Silviu Cotoar\u0103'
description = u'C\u0103l\u0103torii'
publisher = u'Imperator Travel'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Turism,Calatorii'
encoding = 'utf-8'
cover_url = 'http://www.imperatortravel.ro/images/header-1.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'article first_main_article'})
]
remove_tags = [
dict(name='div', attrs={'class':['meta']})
, dict(name='body', attrs={'class':['transparent_widget ff3 win Locale_en_US']})
, dict(name='div', attrs={'class':['connect_widget']})
, dict(name='ul', attrs={'class':['similar-posts']})
]
remove_tags_after = [
dict(name='ul', attrs={'class':['similar-posts']})
]
feeds = [
(u'Feeds', u'http://feeds.feedburner.com/ImperatorTravels')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
lanacion.com.ar
'''
@ -19,9 +19,10 @@ class Lanacion(BasicNewsRecipe):
language = 'es_AR'
publication_type = 'newspaper'
remove_empty_feeds = True
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
extra_css = """ h1{font-family: Georgia,serif}
h2{color: #626262}
masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif'
extra_css = """
h1{font-family: Georgia,serif}
h2{color: #626262; font-weight: normal; font-size: 1.1em}
body{font-family: Arial,sans-serif}
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
.notaFecha{color: #808080}
@ -37,47 +38,78 @@ class Lanacion(BasicNewsRecipe):
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'class':['nota floatFix','topNota','nota','post']})]
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
remove_tags = [
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']})
,dict(name='div' , attrs={'class':['cajaHerramientas noprint','cajaHerramientas floatFix'] })
,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
,dict(name='div' , attrs={'class':['titulosMultimedia','herramientas noprint','cajaHerramientas noprint','cajaHerramientas floatFix'] })
,dict(attrs={'class':['izquierda','espacio17','espacio10','espacio20','floatFix ultimasNoticias','relacionadas','titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
]
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
remove_attributes = ['height','width','visible','onclick','data-count','name']
feeds = [
(u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' )
,(u'Politica' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=30' )
,(u'Economia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=272' )
,(u'Deportes' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=131' )
,(u'Informacion General' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=21' )
,(u'Cultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1' )
,(u'Opinion' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=28' )
,(u'Espectaculos' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=120' )
,(u'Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7' )
,(u'Ciencia&Salud' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=498' )
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
,(u'Enfoques' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=421' )
,(u'Comercio Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=347' )
,(u'Tecnologia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=432' )
,(u'Arquitectura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=366' )
,(u'Turismo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=504' )
,(u'Al volante' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=371' )
,(u'El Campo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=337' )
,(u'Moda y Belleza' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1312' )
,(u'Inmuebles Comerciales', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1363' )
,(u'Countries' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1348' )
,(u'adnCultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6734' )
,(u'The Wall Street Journal Americas', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6373' )
,(u'Estilo de vida' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7353' )
,(u'Management' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7380' )
,(u'Bicentenario' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7276' )
(u'Ultimas Noticias' , u'http://servicios.lanacion.com.ar/herramientas/rss/origen=2' )
,(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' )
,(u'Cultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1' )
,(u'Opinion' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=28' )
,(u'Espectaculos' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=120' )
,(u'Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7' )
,(u'Ciencia&Salud' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=498' )
,(u'Revista' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=494' )
,(u'Enfoques' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=421' )
,(u'Comercio Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=347' )
,(u'Tecnologia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=432' )
,(u'Arquitectura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=366' )
,(u'Turismo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=504' )
,(u'Al volante' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=371' )
,(u'El Campo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=337' )
,(u'Moda y Belleza' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1312')
,(u'Inmuebles Comerciales', u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1363')
,(u'Countries' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1348')
,(u'adnCultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6734')
,(u'The WSJ Americas' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6373')
,(u'Comunidad' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1344')
,(u'Management' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7380')
,(u'Bicentenario' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7276')
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_debug_redirects(True)
br.set_debug_responses(True)
br.set_debug_http(True)
return br
def get_article_url(self, article):
link = BasicNewsRecipe.get_article_url(self,article)
if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
return None
return link
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
monden.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Monden(BasicNewsRecipe):
title = u'Monden'
__author__ = u'Silviu Cotoar\u0103'
description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102'
publisher = u'Monden'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Muzica'
encoding = 'utf-8'
cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':'content'})
]
remove_tags = [
dict(name='div', attrs={'class':['postAuthor']})
, dict(name='div', attrs={'class':['postLike']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['postLike']})
]
feeds = [
(u'Feeds', u'http://www.monden.info/feed/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,23 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1299640653(BasicNewsRecipe):
title = u'Oakland North'
oldest_article = 30
max_articles_per_feed = 100
language = 'en'
__author__ = 'noah'
description = 'Oakland North'
category = 'news'
no_stylesheets = True
masthead_url = 'http://oaklandnorth.net/wp-content/themes/oaklandnorth/images/masthead.png'
keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b(?!-)', re.IGNORECASE)})]
remove_tags_after = [dict(name='p', attrs={'class':'post-postscript'})]
remove_tags = [dict(name='p', attrs={'class':'post-postscript'})]
feeds = [(u'All Headlines', u'http://oaklandnorth.net/feed/')]

View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
promotor.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Promotor(BasicNewsRecipe):
title = u'Promotor'
__author__ = u'Silviu Cotoar\u0103'
description = u'Auto-moto'
publisher = u'Promotor'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,TV,Auto'
encoding = 'utf-8'
cover_url = 'http://www.promotor.ro/images/logo_promotor.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'casetatitluarticol'})
, dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'})
, dict(name='div', attrs={'class':'textb'})
, dict(name='div', attrs={'class':'contentarticol'})
]
remove_tags = [
dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
, dict(name='div', attrs={'class':['etichetagry']})
, dict(name='span', attrs={'class':['textb']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['etichetagry']})
, dict(name='span', attrs={'class':['textb']})
]
feeds = [
(u'Feeds', u'http://www.promotor.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
timesnewroman.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TimesNewRoman(BasicNewsRecipe):
title = u'Times New Roman'
__author__ = u'Silviu Cotoar\u0103'
description = u'Cotidian independent de umor voluntar'
publisher = u'Times New Roman'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Fun'
encoding = 'utf-8'
cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'page'})
]
remove_tags = [
dict(name='p', attrs={'class':['articleinfo']})
, dict(name='div',attrs={'class':['vergefacebooklike']})
, dict(name='div', attrs={'class':'cleared'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'cleared'})
]
feeds = [
(u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -30,7 +30,6 @@ def strftime(epoch, zone=time.gmtime):
def get_connected_device():
from calibre.customize.ui import device_plugins
from calibre.devices.scanner import DeviceScanner
import uuid
dev = None
scanner = DeviceScanner()
scanner.scan()
@ -48,7 +47,7 @@ def get_connected_device():
for d in connected_devices:
try:
d.open(str(uuid.uuid4()))
d.open()
except:
continue
else:

View File

@ -6,7 +6,7 @@ Provides a command-line and optional graphical interface to the SONY Reader PRS-
For usage information run the script.
"""
import StringIO, sys, time, os, uuid
import StringIO, sys, time, os
from optparse import OptionParser
from calibre import __version__, __appname__
@ -213,7 +213,7 @@ def main():
for d in connected_devices:
try:
d.open(str(uuid.uuid4()))
d.open(None)
except:
continue
else:

View File

@ -25,7 +25,7 @@ class DRMError(ValueError):
class ParserError(ValueError):
pass
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'htm', 'xhtm',
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb']

View File

@ -13,6 +13,7 @@ from urlparse import urlparse
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
from calibre.ebooks import ConversionError
from calibre.utils.ordered_dict import OrderedDict
def XPath(x):
try:
@ -95,10 +96,8 @@ class DetectStructure(object):
self.log.exception('Failed to mark chapter')
def create_level_based_toc(self):
if self.opts.level1_toc is None:
return
for item in self.oeb.spine:
self.add_leveled_toc_items(item)
if self.opts.level1_toc is not None:
self.add_leveled_toc_items()
def create_toc_from_chapters(self):
counter = self.oeb.toc.next_play_order()
@ -145,49 +144,57 @@ class DetectStructure(object):
return text, href
def add_leveled_toc_items(self, item):
level1 = XPath(self.opts.level1_toc)(item.data)
level1_order = []
document = item
def add_leveled_toc_items(self):
added = OrderedDict()
added2 = OrderedDict()
counter = 1
if level1:
added = {}
for elem in level1:
for document in self.oeb.spine:
previous_level1 = list(added.itervalues())[-1] if added else None
previous_level2 = list(added2.itervalues())[-1] if added2 else None
for elem in XPath(self.opts.level1_toc)(document.data):
text, _href = self.elem_to_link(document, elem, counter)
counter += 1
if text:
node = self.oeb.toc.add(text, _href,
play_order=self.oeb.toc.next_play_order())
level1_order.append(node)
added[elem] = node
#node.add(_('Top'), _href)
if self.opts.level2_toc is not None:
added2 = {}
level2 = list(XPath(self.opts.level2_toc)(document.data))
for elem in level2:
if self.opts.level2_toc is not None and added:
for elem in XPath(self.opts.level2_toc)(document.data):
level1 = None
for item in document.data.iterdescendants():
if item in added.keys():
if item in added:
level1 = added[item]
elif item == elem and level1 is not None:
elif item == elem:
if level1 is None:
if previous_level1 is None:
break
level1 = previous_level1
text, _href = self.elem_to_link(document, elem, counter)
counter += 1
if text:
added2[elem] = level1.add(text, _href,
play_order=self.oeb.toc.next_play_order())
if self.opts.level3_toc is not None:
level3 = list(XPath(self.opts.level3_toc)(document.data))
for elem in level3:
break
if self.opts.level3_toc is not None and added2:
for elem in XPath(self.opts.level3_toc)(document.data):
level2 = None
for item in document.data.iterdescendants():
if item in added2.keys():
if item in added2:
level2 = added2[item]
elif item == elem and level2 is not None:
elif item == elem:
if level2 is None:
if previous_level2 is None:
break
level2 = previous_level2
text, _href = \
self.elem_to_link(document, elem, counter)
counter += 1
if text:
level2.add(text, _href,
play_order=self.oeb.toc.next_play_order())
break

View File

@ -46,7 +46,8 @@ class Tokenize:
def __remove_uc_chars(self, startchar, token):
for i in xrange(startchar, len(token)):
if token[i] == " ":
#handle the case of an uc char with a terminating blank before ansi char
if token[i] == " " and self.__uc_char:
continue
elif self.__uc_char:
self.__uc_char -= 1

View File

@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin):
name = 'TXT Input'
author = 'John Schember'
description = 'Convert TXT files to HTML'
file_types = set(['txt', 'txtz'])
file_types = set(['txt', 'txtz', 'text'])
options = set([
OptionRecommendation(name='paragraph_type', recommended_value='auto',

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__license__ = 'GPL v3'
from PyQt4.Qt import QDialog, QVBoxLayout, QLabel, QDialogButtonBox, \
QListWidget, QAbstractItemView
from PyQt4 import QtGui
class ChoosePluginToolbarsDialog(QDialog):
def __init__(self, parent, plugin, locations):
QDialog.__init__(self, parent)
self.locations = locations
self.setWindowTitle(
_('Add "%s" to toolbars or menus')%plugin.name)
self._layout = QVBoxLayout(self)
self.setLayout(self._layout)
self._header_label = QLabel(
_('Select the toolbars and/or menus to add <b>%s</b> to:') %
plugin.name)
self._layout.addWidget(self._header_label)
self._locations_list = QListWidget(self)
self._locations_list.setSelectionMode(QAbstractItemView.MultiSelection)
sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Preferred,
QtGui.QSizePolicy.Minimum)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
self._locations_list.setSizePolicy(sizePolicy)
for key, text in locations:
self._locations_list.addItem(text)
self._layout.addWidget(self._locations_list)
self._footer_label = QLabel(
_('You can also customise the plugin locations '
'using <b>Preferences -> Customise the toolbar</b>'))
self._layout.addWidget(self._footer_label)
button_box = QDialogButtonBox(QDialogButtonBox.Ok |
QDialogButtonBox.Cancel)
button_box.accepted.connect(self.accept)
button_box.rejected.connect(self.reject)
self._layout.addWidget(button_box)
self.resize(self.sizeHint())
def selected_locations(self):
selected = []
for row in self._locations_list.selectionModel().selectedRows():
selected.append(self.locations[row.row()])
return selected

View File

@ -16,9 +16,10 @@ from calibre.customize.ui import initialized_plugins, is_disabled, enable_plugin
disable_plugin, plugin_customization, add_plugin, \
remove_plugin
from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \
question_dialog
question_dialog, gprefs
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.icu import lower
from calibre.utils.ordered_dict import OrderedDict
class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
@ -281,6 +282,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self._plugin_model.populate()
self._plugin_model.reset()
self.changed_signal.emit()
self.check_for_add_to_toolbars(plugin)
info_dialog(self, _('Success'),
_('Plugin <b>{0}</b> successfully installed under <b>'
' {1} plugins</b>. You may have to restart calibre '
@ -342,6 +344,37 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
plugin.name + _(' cannot be removed. It is a '
'builtin plugin. Try disabling it instead.')).exec_()
def check_for_add_to_toolbars(self, plugin):
from calibre.gui2.preferences.toolbar import ConfigWidget
from calibre.customize import InterfaceActionBase
if not isinstance(plugin, InterfaceActionBase):
return
all_locations = OrderedDict(ConfigWidget.LOCATIONS)
plugin_action = plugin.load_actual_plugin(self.gui)
installed_actions = OrderedDict([
(key, list(gprefs.get('action-layout-'+key, [])))
for key in all_locations])
# If already installed in a GUI container, do nothing
for action_names in installed_actions.itervalues():
if plugin_action.name in action_names:
return
allowed_locations = [(key, text) for key, text in
all_locations.iteritems() if key
not in plugin_action.dont_add_to]
if not allowed_locations:
return # This plugin doesn't want to live in the GUI
from calibre.gui2.dialogs.choose_plugin_toolbars import ChoosePluginToolbarsDialog
d = ChoosePluginToolbarsDialog(self, plugin_action, allowed_locations)
if d.exec_() == d.Accepted:
for key, text in d.selected_locations():
installed_actions = list(gprefs.get('action-layout-'+key, []))
installed_actions.append(plugin_action.name)
gprefs['action-layout-'+key] = tuple(installed_actions)
if __name__ == '__main__':
from PyQt4.Qt import QApplication