mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
54e7ba109d
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
|
|||||||
title = '180.com.uy'
|
title = '180.com.uy'
|
||||||
__author__ = 'Gustavo Azambuja'
|
__author__ = 'Gustavo Azambuja'
|
||||||
description = 'Noticias de Uruguay'
|
description = 'Noticias de Uruguay'
|
||||||
language = 'es'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 5
|
recursion = 5
|
||||||
|
@ -20,7 +20,7 @@ class SieteDias(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
@ -58,4 +58,4 @@ class Ambito(BasicNewsRecipe):
|
|||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
@ -12,7 +12,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
|||||||
masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
|
masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'es'
|
language = 'es_MX'
|
||||||
|
|
||||||
#feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]
|
#feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ class Axxon_news(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
INDEX = 'http://axxon.com.ar/rev/'
|
INDEX = 'http://axxon.com.ar/rev/'
|
||||||
|
@ -18,7 +18,7 @@ class Axxon_news(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
|
|||||||
title = 'bitacora.com.uy'
|
title = 'bitacora.com.uy'
|
||||||
__author__ = 'Gustavo Azambuja'
|
__author__ = 'Gustavo Azambuja'
|
||||||
description = 'Noticias de Uruguay'
|
description = 'Noticias de Uruguay'
|
||||||
language = 'es'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 5
|
recursion = 5
|
||||||
|
@ -20,7 +20,7 @@ class BsAsEconomico(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
INDEX = 'http://www.clarin.com'
|
INDEX = 'http://www.clarin.com'
|
||||||
masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
|
masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
|
||||||
|
@ -14,7 +14,7 @@ class CriticaDigital(BasicNewsRecipe):
|
|||||||
description = 'Noticias de Argentina'
|
description = 'Noticias de Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
@ -11,7 +11,7 @@ class CubaDebate(BasicNewsRecipe):
|
|||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Contra el Terorismo Mediatico'
|
description = 'Contra el Terorismo Mediatico'
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
language = 'es'
|
language = 'es_CU'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
@ -20,8 +20,8 @@ class CubaDebate(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
|
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
#BlogTitle{font-size: xx-large; font-weight: bold}
|
#BlogTitle{font-size: xx-large; font-weight: bold}
|
||||||
body{font-family: Verdana, Arial, Tahoma, sans-serif}
|
body{font-family: Verdana, Arial, Tahoma, sans-serif}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -41,7 +41,7 @@ class CubaDebate(BasicNewsRecipe):
|
|||||||
|
|
||||||
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
||||||
remove_attributes=['width','height','lang']
|
remove_attributes=['width','height','lang']
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + 'print/'
|
return url + 'print/'
|
||||||
|
|
||||||
@ -50,5 +50,5 @@ class CubaDebate(BasicNewsRecipe):
|
|||||||
del item['style']
|
del item['style']
|
||||||
for item in soup.findAll('img'):
|
for item in soup.findAll('img'):
|
||||||
if not item.has_key('alt'):
|
if not item.has_key('alt'):
|
||||||
item['alt'] = 'image'
|
item['alt'] = 'image'
|
||||||
return soup
|
return soup
|
||||||
|
@ -16,7 +16,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'es'
|
language = 'de_ES'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
|
masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
|
||||||
|
@ -20,7 +20,7 @@ class Diagonales(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
@ -20,8 +20,8 @@ class ElMercurio(BasicNewsRecipe):
|
|||||||
masthead_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
masthead_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_CL'
|
||||||
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -33,7 +33,7 @@ class ElMercurio(BasicNewsRecipe):
|
|||||||
keep_only_tags = [dict(name='div', attrs={'id':['cont_iz_titulobajada','cont_iz_creditos_1_a','cont_iz_cuerpo']})]
|
keep_only_tags = [dict(name='div', attrs={'id':['cont_iz_titulobajada','cont_iz_creditos_1_a','cont_iz_cuerpo']})]
|
||||||
remove_tags = [dict(name='div', attrs={'id':'cont_iz_cuerpo_relacionados'})]
|
remove_tags = [dict(name='div', attrs={'id':'cont_iz_cuerpo_relacionados'})]
|
||||||
remove_attributes = ['height','width']
|
remove_attributes = ['height','width']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Noticias de ultima hora', u'http://rss.emol.com/rss.asp?canal=0')
|
(u'Noticias de ultima hora', u'http://rss.emol.com/rss.asp?canal=0')
|
||||||
,(u'Nacional', u'http://rss.emol.com/rss.asp?canal=1')
|
,(u'Nacional', u'http://rss.emol.com/rss.asp?canal=1')
|
||||||
|
@ -13,7 +13,7 @@ class ObservaDigital(BasicNewsRecipe):
|
|||||||
title = 'Observa Digital'
|
title = 'Observa Digital'
|
||||||
__author__ = 'yrvn'
|
__author__ = 'yrvn'
|
||||||
description = 'Noticias de Uruguay'
|
description = 'Noticias de Uruguay'
|
||||||
language = 'es'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 5
|
recursion = 5
|
||||||
|
@ -14,7 +14,7 @@ class General(BasicNewsRecipe):
|
|||||||
description = 'Noticias de Uruguay y el resto del mundo'
|
description = 'Noticias de Uruguay y el resto del mundo'
|
||||||
publisher = 'EL PAIS S.A.'
|
publisher = 'EL PAIS S.A.'
|
||||||
category = 'news, politics, Uruguay'
|
category = 'news, politics, Uruguay'
|
||||||
language = 'es'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 2
|
recursion = 2
|
||||||
|
@ -20,7 +20,7 @@ class ElUniversal(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
language = 'es'
|
language = 'es_MX'
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body{font-family:Arial,Helvetica,sans-serif}
|
body{font-family:Arial,Helvetica,sans-serif}
|
||||||
|
@ -12,7 +12,7 @@ class ElArgentino(BasicNewsRecipe):
|
|||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Informacion Libre las 24 horas'
|
||||||
publisher = 'ElArgentino.com'
|
publisher = 'ElArgentino.com'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
@ -20,7 +20,7 @@ class ElArgentino(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
@ -28,16 +28,16 @@ class ElArgentino(BasicNewsRecipe):
|
|||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'noprint' })
|
dict(name='div', attrs={'id':'noprint' })
|
||||||
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
||||||
,dict(name='a' , attrs={'target':'_blank' })
|
,dict(name='a' , attrs={'target':'_blank' })
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Portada' , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home' )
|
(u'Portada' , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home' )
|
||||||
,(u'Pais' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs' )
|
,(u'Pais' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs' )
|
||||||
,(u'Economia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa' )
|
,(u'Economia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa' )
|
||||||
@ -51,12 +51,12 @@ class ElArgentino(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main, sep, article_part = url.partition('/nota-')
|
main, sep, article_part = url.partition('/nota-')
|
||||||
article_id, rsep, rrest = article_part.partition('-')
|
article_id, rsep, rrest = article_part.partition('-')
|
||||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
@ -18,7 +18,7 @@ class ElComercio(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
language = 'es'
|
language = 'es_EC'
|
||||||
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
|
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
|
||||||
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
|
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ class ElCronista(BasicNewsRecipe):
|
|||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Argentina'
|
description = 'Noticias de Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -25,14 +25,14 @@ class ElCronista(BasicNewsRecipe):
|
|||||||
, '--category' , 'news, Argentina'
|
, '--category' , 'news, Argentina'
|
||||||
, '--publisher' , title
|
, '--publisher' , title
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='table', attrs={'width':'100%' })
|
dict(name='table', attrs={'width':'100%' })
|
||||||
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
|
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
|
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
|
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
|
||||||
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
|
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
|
||||||
@ -69,4 +69,4 @@ class ElCronista(BasicNewsRecipe):
|
|||||||
if link_item:
|
if link_item:
|
||||||
cover_url = index + link_item.img['src']
|
cover_url = index + link_item.img['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ class ElTiempoHn(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es_HN'
|
||||||
|
|
||||||
lang = 'es-HN'
|
lang = 'es-HN'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
@ -18,7 +18,7 @@ class ElUniversal(BasicNewsRecipe):
|
|||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
publisher = 'El Universal'
|
publisher = 'El Universal'
|
||||||
category = 'news, Caracas, Venezuela, world'
|
category = 'news, Caracas, Venezuela, world'
|
||||||
language = 'es'
|
language = 'es_VE'
|
||||||
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
|
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class ElUniversalImpresaRecipe(BasicNewsRecipe):
|
class ElUniversalImpresaRecipe(BasicNewsRecipe):
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'kwetal'
|
__author__ = 'kwetal'
|
||||||
language = 'es'
|
language = 'es_MX'
|
||||||
version = 1
|
version = 1
|
||||||
|
|
||||||
title = u'El Universal (Edici\u00F3n Impresa)'
|
title = u'El Universal (Edici\u00F3n Impresa)'
|
||||||
|
@ -17,7 +17,7 @@ class ElUniverso_Ecuador(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_EC'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'
|
masthead_url = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'
|
||||||
|
54
resources/recipes/explosm.recipe
Normal file
54
resources/recipes/explosm.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Explosm(BasicNewsRecipe):
|
||||||
|
title = u'Explosm Rotated'
|
||||||
|
__author__ = 'Andromeda Rabbit'
|
||||||
|
description = 'Explosm'
|
||||||
|
language = 'en'
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 24
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
|
||||||
|
]
|
||||||
|
|
||||||
|
#match_regexps = [r'http://www.explosm.net/comics/.*']
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
|
||||||
|
remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def skip_ad_pages(self, soup):
|
||||||
|
# Skip ad pages served before actual article
|
||||||
|
skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
|
||||||
|
if skip_tag is None:
|
||||||
|
return soup
|
||||||
|
return None
|
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
|
|||||||
title = 'freeway.com.uy'
|
title = 'freeway.com.uy'
|
||||||
__author__ = 'Gustavo Azambuja'
|
__author__ = 'Gustavo Azambuja'
|
||||||
description = 'Revista Freeway, Montevideo, Uruguay'
|
description = 'Revista Freeway, Montevideo, Uruguay'
|
||||||
language = 'es'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 1
|
recursion = 1
|
||||||
|
@ -20,7 +20,7 @@ class Granma(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
|
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
|
||||||
language = 'es'
|
language = 'es_CU'
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ class iEco(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
publisher = 'Grupo Clarin'
|
publisher = 'Grupo Clarin'
|
||||||
category = 'news, economia, mercados, bolsa de valores, finanzas, empresas, negocios, empleos, emprendedores, marketinguniversidades, tecnologia, agronegocios, noticias, informacion'
|
category = 'news, economia, mercados, bolsa de valores, finanzas, empresas, negocios, empleos, emprendedores, marketinguniversidades, tecnologia, agronegocios, noticias, informacion'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
cover_url = 'http://www.ieco.clarin.com/static2/images/Tapa-PDF.gif'
|
cover_url = 'http://www.ieco.clarin.com/static2/images/Tapa-PDF.gif'
|
||||||
extra_css = ' #bd{font-family: sans-serif} '
|
extra_css = ' #bd{font-family: sans-serif} '
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ class Infobae(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
masthead_url = 'http://www.infobae.com/imgs/header/header.gif'
|
masthead_url = 'http://www.infobae.com/imgs/header/header.gif'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
@ -25,7 +25,7 @@ class Infobae(BasicNewsRecipe):
|
|||||||
body{font-family:Arial,Helvetica,sans-serif;}
|
body{font-family:Arial,Helvetica,sans-serif;}
|
||||||
.popUpTitulo{color:#0D4261; font-size: xx-large}
|
.popUpTitulo{color:#0D4261; font-size: xx-large}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
@ -33,7 +33,7 @@ class Infobae(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
, 'linearize_tables' : True
|
, 'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
||||||
|
@ -20,7 +20,7 @@ class Juventudrebelde(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
language = 'es'
|
language = 'es_CU'
|
||||||
|
|
||||||
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
|
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
@ -50,4 +50,4 @@ class LaCuarta(BasicNewsRecipe):
|
|||||||
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
|
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
|
||||||
|
|
||||||
|
|
||||||
language = 'es'
|
language = 'es_CL'
|
||||||
|
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
|
|||||||
title = 'La Diaria'
|
title = 'La Diaria'
|
||||||
__author__ = 'Gustavo Azambuja'
|
__author__ = 'Gustavo Azambuja'
|
||||||
description = 'Noticias de Uruguay'
|
description = 'Noticias de Uruguay'
|
||||||
language = 'es'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 5
|
recursion = 5
|
||||||
|
@ -19,7 +19,7 @@ class LaJornada_mx(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_MX'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf")
|
cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf")
|
||||||
masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'
|
masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'
|
||||||
@ -34,8 +34,8 @@ class LaJornada_mx(BasicNewsRecipe):
|
|||||||
.credito{font-weight: bold; margin-left: 1em}
|
.credito{font-weight: bold; margin-left: 1em}
|
||||||
.credito-autor{font-variant: small-caps; font-weight: bold }
|
.credito-autor{font-variant: small-caps; font-weight: bold }
|
||||||
.credito-titulo{text-align: right}
|
.credito-titulo{text-align: right}
|
||||||
.hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em }
|
.hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em }
|
||||||
.loc{font-weight: bold}
|
.loc{font-weight: bold}
|
||||||
.carton{text-align: center}
|
.carton{text-align: center}
|
||||||
.credit{font-weight: bold}
|
.credit{font-weight: bold}
|
||||||
.sumario{font-weight: bold; text-align: center}
|
.sumario{font-weight: bold; text-align: center}
|
||||||
@ -56,7 +56,7 @@ class LaJornada_mx(BasicNewsRecipe):
|
|||||||
,re.DOTALL|re.IGNORECASE)
|
,re.DOTALL|re.IGNORECASE)
|
||||||
,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
|
,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','credito-articulo','text','carton']})
|
dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','credito-articulo','text','carton']})
|
||||||
,dict(name='div', attrs={'id':'renderComments'})
|
,dict(name='div', attrs={'id':'renderComments'})
|
||||||
@ -88,4 +88,4 @@ class LaJornada_mx(BasicNewsRecipe):
|
|||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
rurl = article.get('link', None)
|
rurl = article.get('link', None)
|
||||||
return rurl.rpartition('&partner=')[0]
|
return rurl.rpartition('&partner=')[0]
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ class LaRazon_Bol(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_BO'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
delay = 1
|
delay = 1
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class LaSegunda(BasicNewsRecipe):
|
class LaSegunda(BasicNewsRecipe):
|
||||||
title = 'La Segunda'
|
title = 'La Segunda'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'El sitio de noticias online de Chile'
|
description = 'El sitio de noticias online de Chile'
|
||||||
publisher = 'La Segunda'
|
publisher = 'La Segunda'
|
||||||
category = 'news, politics, Chile'
|
category = 'news, politics, Chile'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
@ -19,9 +19,9 @@ class LaSegunda(BasicNewsRecipe):
|
|||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
masthead_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
masthead_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
language = 'es'
|
language = 'es_CL'
|
||||||
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
|
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
@ -29,13 +29,13 @@ class LaSegunda(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
, 'linearize_tables' : True
|
, 'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_tags_before = dict(attrs={'class':'titulonegritastop'})
|
remove_tags_before = dict(attrs={'class':'titulonegritastop'})
|
||||||
remove_tags = [dict(name='img')]
|
remove_tags = [dict(name='img')]
|
||||||
remove_attributes = ['width','height']
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
|
(u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
|
||||||
,(u'Politica' , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
|
,(u'Politica' , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
|
||||||
,(u'Cronica' , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
|
,(u'Cronica' , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
|
||||||
@ -49,6 +49,6 @@ class LaSegunda(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
||||||
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
|
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
|
||||||
|
|
||||||
|
@ -11,15 +11,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class LaMujerDeMiVida(BasicNewsRecipe):
|
class LaMujerDeMiVida(BasicNewsRecipe):
|
||||||
title = 'La Mujer de mi Vida'
|
title = 'La Mujer de mi Vida'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Cultura de otra manera'
|
description = 'Cultura de otra manera'
|
||||||
oldest_article = 90
|
oldest_article = 90
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
publisher = 'La Mujer de mi Vida'
|
publisher = 'La Mujer de mi Vida'
|
||||||
category = 'literatura, critica, arte, ensayos'
|
category = 'literatura, critica, arte, ensayos'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
INDEX = 'http://www.lamujerdemivida.com.ar/'
|
INDEX = 'http://www.lamujerdemivida.com.ar/'
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
@ -28,8 +28,8 @@ class LaMujerDeMiVida(BasicNewsRecipe):
|
|||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='table', attrs={'width':'570'})]
|
keep_only_tags = [dict(name='table', attrs={'width':'570'})]
|
||||||
|
|
||||||
@ -51,7 +51,7 @@ class LaMujerDeMiVida(BasicNewsRecipe):
|
|||||||
if cover_item:
|
if cover_item:
|
||||||
cover_url = self.INDEX + cover_item['src']
|
cover_url = self.INDEX + cover_item['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
totalfeeds = []
|
totalfeeds = []
|
||||||
lfeeds = self.get_feeds()
|
lfeeds = self.get_feeds()
|
||||||
@ -74,4 +74,4 @@ class LaMujerDeMiVida(BasicNewsRecipe):
|
|||||||
})
|
})
|
||||||
totalfeeds.append((feedtitle, articles))
|
totalfeeds.append((feedtitle, articles))
|
||||||
return totalfeeds
|
return totalfeeds
|
||||||
|
|
||||||
|
@ -16,17 +16,17 @@ class Lanacion(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
|
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
|
||||||
extra_css = """ h1{font-family: Georgia,serif}
|
extra_css = """ h1{font-family: Georgia,serif}
|
||||||
h2{color: #626262}
|
h2{color: #626262}
|
||||||
body{font-family: Arial,sans-serif}
|
body{font-family: Arial,sans-serif}
|
||||||
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
|
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
|
||||||
.notaFecha{color: #808080}
|
.notaFecha{color: #808080}
|
||||||
.notaEpigrafe{font-size: x-small}
|
.notaEpigrafe{font-size: x-small}
|
||||||
.topNota h1{font-family: Arial,sans-serif}
|
.topNota h1{font-family: Arial,sans-serif}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@ -45,7 +45,7 @@ class Lanacion(BasicNewsRecipe):
|
|||||||
,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
|
,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
|
||||||
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
|
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
|
||||||
]
|
]
|
||||||
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
|
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
|
||||||
remove_attributes = ['height','width','visible','onclick','data-count','name']
|
remove_attributes = ['height','width','visible','onclick','data-count','name']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
@ -51,4 +51,4 @@ class LaNacionChile(BasicNewsRecipe):
|
|||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = 'es'
|
language = 'es_CL'
|
||||||
|
@ -21,9 +21,9 @@ class LaPrensa(BasicNewsRecipe):
|
|||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
# cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
|
# cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
lang = 'es'
|
lang = 'es'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
@ -32,7 +32,7 @@ class LaPrensa(BasicNewsRecipe):
|
|||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
filter_regexps = [r'.*archive.aspx.*']
|
filter_regexps = [r'.*archive.aspx.*']
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='td', attrs={'class':["link-registro","link-buscador"]}),
|
dict(name='td', attrs={'class':["link-registro","link-buscador"]}),
|
||||||
dict(name='td', attrs={'id':["TDTabItem1","TDTabItem2","TDTabItem3","TDTabItem4"]}),
|
dict(name='td', attrs={'id':["TDTabItem1","TDTabItem2","TDTabItem3","TDTabItem4"]}),
|
||||||
@ -58,9 +58,9 @@ class LaPrensa(BasicNewsRecipe):
|
|||||||
dict(name='img', src = "/versions/1/imgs/separador-linea-azul.gif"),
|
dict(name='img', src = "/versions/1/imgs/separador-linea-azul.gif"),
|
||||||
dict(name='img', src = " /versions/1/imgs/separador-linea.gif"),
|
dict(name='img', src = " /versions/1/imgs/separador-linea.gif"),
|
||||||
dict(name='a',text ="Powered by Civinext Groupware - V. 2.0.3567.23706"),
|
dict(name='a',text ="Powered by Civinext Groupware - V. 2.0.3567.23706"),
|
||||||
dict(name='img', height ="0")
|
dict(name='img', height ="0")
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.seccion{font-size:xx-small;}
|
.seccion{font-size:xx-small;}
|
||||||
body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||||
@ -69,7 +69,7 @@ class LaPrensa(BasicNewsRecipe):
|
|||||||
.fecha{font-size:xx-small;}
|
.fecha{font-size:xx-small;}
|
||||||
.volanta{font-size:xx-small;}
|
.volanta{font-size:xx-small;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Politica' , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=4' )
|
(u'Politica' , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=4' )
|
||||||
,(u'Economia' , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=5' )
|
,(u'Economia' , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=5' )
|
||||||
@ -80,14 +80,14 @@ class LaPrensa(BasicNewsRecipe):
|
|||||||
,(u'Espectaculos', u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx?Rss=10')
|
,(u'Espectaculos', u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx?Rss=10')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
for t in soup.findAll(['table','td','tr','span','tbody']):
|
for t in soup.findAll(['table','td','tr','span','tbody']):
|
||||||
t.name = 'div'
|
t.name = 'div'
|
||||||
for t in soup.findAll(['hr']):
|
for t in soup.findAll(['hr']):
|
||||||
t.extract()
|
t.extract()
|
||||||
|
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
@ -95,8 +95,8 @@ class LaPrensa(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(align = "center"):
|
for item in soup.findAll(align = "center"):
|
||||||
del item['align']
|
del item['align']
|
||||||
for item in soup.findAll(bgcolor="ffffff"):
|
for item in soup.findAll(bgcolor="ffffff"):
|
||||||
del item['bgcolor']
|
del item['bgcolor']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ class LaPrensaHn(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es_HN'
|
||||||
|
|
||||||
lang = 'es-HN'
|
lang = 'es-HN'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
@ -22,7 +22,7 @@ class LaPrensa_ni(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
language = 'es'
|
language = 'es_NI'
|
||||||
|
|
||||||
months_es = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
|
months_es = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
|
||||||
current_month = months_es[datetime.date.today().month - 1]
|
current_month = months_es[datetime.date.today().month - 1]
|
||||||
|
@ -1,73 +1,92 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
latimes.com
|
www.latimes.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class LATimes(BasicNewsRecipe):
|
class LATimes(BasicNewsRecipe):
|
||||||
title = u'The Los Angeles Times'
|
title = 'Los Angeles Times'
|
||||||
__author__ = u'Darko Miletic and Sujata Raman'
|
__author__ = 'Darko Miletic'
|
||||||
description = u'News from Los Angeles'
|
description = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California'
|
||||||
oldest_article = 7
|
publisher = 'Tribune Company'
|
||||||
max_articles_per_feed = 100
|
category = 'news, politics, USA, Los Angeles, world'
|
||||||
language = 'en'
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
language = 'en'
|
||||||
lang = 'en-US'
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://www.latimes.com/images/logo.png'
|
||||||
|
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Georgia,"Times New Roman",Times,serif }
|
||||||
|
img{margin-bottom: 0.4em; margin-top: 0.8em; display:block}
|
||||||
|
h2{font-size: 1.1em}
|
||||||
|
.deckhead{font-size: small; text-transform: uppercase}
|
||||||
|
.small{color: gray; font-size: small}
|
||||||
|
.date,.time,.copyright{font-size: x-small; color:gray; font-style:italic;}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'language' : lang
|
, 'tags' : category
|
||||||
}
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : 'Yes'
|
||||||
|
}
|
||||||
|
|
||||||
extra_css = '''
|
keep_only_tags = [
|
||||||
h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
|
dict(name='div', attrs={'class':'story'})
|
||||||
h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
|
,dict(attrs={'class':['entry-header','time','entry-content']})
|
||||||
.story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
]
|
||||||
.entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
remove_tags_after=dict(name='p', attrs={'class':'copyright'})
|
||||||
.entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
remove_tags = [
|
||||||
.credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
dict(name=['meta','link','iframe','object','embed'])
|
||||||
.small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
,dict(attrs={'class':['toolSet','articlerail','googleAd','entry-footer-left','entry-footer-right','entry-footer-social','google-ad-story-bottom','sphereTools']})
|
||||||
.byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
,dict(attrs={'id':['article-promo','googleads','moduleArticleToolsContainer','gallery-subcontent']})
|
||||||
.date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
|
]
|
||||||
.time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
|
remove_attributes=['lang','xmlns:fb','xmlns:og','border','xtags','i','article_body']
|
||||||
.copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
|
|
||||||
.subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
# recursions = 1
|
|
||||||
# match_regexps = [r'http://www.latimes.com/.*page=[2-9]']
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':["story" ,"entry"] })]
|
|
||||||
|
|
||||||
|
|
||||||
remove_tags = [ dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}),
|
feeds = [
|
||||||
dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}),
|
(u'Top News' , u'http://feeds.latimes.com/latimes/news' )
|
||||||
dict(name='p', attrs={'class':["entry-footer",]}),
|
,(u'Local News' , u'http://feeds.latimes.com/latimes/news/local' )
|
||||||
dict(name='ul', attrs={'class':"article-nav clearfix"}),
|
,(u'National' , u'http://feeds.latimes.com/latimes/news/nationworld/nation' )
|
||||||
dict(name=['iframe'])
|
,(u'National Politics' , u'http://feeds.latimes.com/latimes/news/politics/' )
|
||||||
]
|
,(u'Business' , u'http://feeds.latimes.com/latimes/business' )
|
||||||
|
,(u'Education' , u'http://feeds.latimes.com/latimes/news/education' )
|
||||||
|
,(u'Environment' , u'http://feeds.latimes.com/latimes/news/science/environment' )
|
||||||
feeds = [(u'News', u'http://feeds.latimes.com/latimes/news')
|
,(u'Religion' , u'http://feeds.latimes.com/latimes/features/religion' )
|
||||||
,(u'Local','http://feeds.latimes.com/latimes/news/local')
|
,(u'Science' , u'http://feeds.latimes.com/latimes/news/science' )
|
||||||
,(u'MostEmailed','http://feeds.latimes.com/MostEmailed')
|
,(u'Technology' , u'http://feeds.latimes.com/latimes/technology' )
|
||||||
,(u'Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/')
|
,(u'Africa' , u'http://feeds.latimes.com/latimes/africa' )
|
||||||
,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/')
|
,(u'Asia' , u'http://feeds.latimes.com/latimes/asia' )
|
||||||
,('National','http://feeds.latimes.com/latimes/news/nationworld/nation')
|
,(u'Europe' , u'http://feeds.latimes.com/latimes/europe' )
|
||||||
,('Politics','http://feeds.latimes.com/latimes/news/politics/')
|
,(u'Latin America' , u'http://feeds.latimes.com/latimes/latinamerica' )
|
||||||
,('Business','http://feeds.latimes.com/latimes/business')
|
,(u'Middle East' , u'http://feeds.latimes.com/latimes/middleeast' )
|
||||||
,('Sports','http://feeds.latimes.com/latimes/sports/')
|
,(u'Arts&Culture' , u'http://feeds.feedburner.com/latimes/entertainment/news/arts' )
|
||||||
,('Entertainment','http://feeds.latimes.com/latimes/entertainment/')
|
,(u'Entertainment News' , u'http://feeds.feedburner.com/latimes/entertainment/news/' )
|
||||||
]
|
,(u'Movie News' , u'http://feeds.feedburner.com/latimes/entertainment/news/movies/' )
|
||||||
|
,(u'Movie Reviews' , u'http://feeds.feedburner.com/movies/reviews/' )
|
||||||
|
,(u'Music News' , u'http://feeds.feedburner.com/latimes/entertainment/news/music/' )
|
||||||
|
,(u'Pop Album Reviews' , u'http://feeds.feedburner.com/latimes/pop-album-reviews' )
|
||||||
|
,(u'Restaurant Reviews' , u'http://feeds.feedburner.com/latimes/restaurant/reviews' )
|
||||||
|
,(u'Theatar and Dance' , u'http://feeds.feedburner.com/latimes/theaterdance' )
|
||||||
|
,(u'Autos' , u'http://feeds.latimes.com/latimes/classified/automotive/highway1/')
|
||||||
|
,(u'Books' , u'http://feeds.latimes.com/features/books' )
|
||||||
|
,(u'Food' , u'http://feeds.latimes.com/latimes/features/food/' )
|
||||||
|
,(u'Health' , u'http://feeds.latimes.com/latimes/features/health/' )
|
||||||
|
,(u'Real Estate' , u'http://feeds.latimes.com/latimes/classified/realestate/' )
|
||||||
|
,(u'Commentary' , u'http://feeds2.feedburner.com/latimes/news/opinion/commentary/' )
|
||||||
|
,(u'Sports' , u'http://feeds.latimes.com/latimes/sports/' )
|
||||||
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
ans = article.get('feedburner_origlink').rpartition('?')[0]
|
ans = BasicNewsRecipe.get_article_url(self, article).rpartition('?')[0]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.log('Looking for full story link in', ans)
|
self.log('Looking for full story link in', ans)
|
||||||
@ -83,4 +102,22 @@ class LATimes(BasicNewsRecipe):
|
|||||||
pass
|
pass
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name ='div'
|
||||||
|
item.attrs =[]
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
return soup
|
||||||
|
@ -21,7 +21,7 @@ class LaTribuna(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es_HN'
|
||||||
|
|
||||||
lang = 'es-HN'
|
lang = 'es-HN'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
@ -18,7 +18,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_BO'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
delay = 1
|
delay = 1
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
@ -12,7 +12,7 @@ import datetime
|
|||||||
class Milenio(BasicNewsRecipe):
|
class Milenio(BasicNewsRecipe):
|
||||||
title = u'Milenio-diario'
|
title = u'Milenio-diario'
|
||||||
__author__ = 'Bmsleight'
|
__author__ = 'Bmsleight'
|
||||||
language = 'es'
|
language = 'es_MX'
|
||||||
description = 'Milenio-diario'
|
description = 'Milenio-diario'
|
||||||
oldest_article = 10
|
oldest_article = 10
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
@ -20,7 +20,7 @@ class MiradasAlSur(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
|
|||||||
title = 'Montevideo COMM'
|
title = 'Montevideo COMM'
|
||||||
__author__ = 'Gustavo Azambuja'
|
__author__ = 'Gustavo Azambuja'
|
||||||
description = 'Noticias de Uruguay'
|
description = 'Noticias de Uruguay'
|
||||||
language = 'es'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 5
|
recursion = 5
|
||||||
|
@ -20,7 +20,7 @@ class Newsweek_Argentina(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
|
|||||||
title = 'Observa Digital'
|
title = 'Observa Digital'
|
||||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||||
description = 'Noticias desde Uruguay'
|
description = 'Noticias desde Uruguay'
|
||||||
language = 'es'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 5
|
recursion = 5
|
||||||
|
@ -19,15 +19,15 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: Arial,Helvetica,sans-serif }
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
img{margin-bottom: 0.4em; display:block}
|
img{margin-bottom: 0.4em; display:block}
|
||||||
#autor{font-weight: bold}
|
#autor{font-weight: bold}
|
||||||
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
|
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
|
||||||
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
|
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
|
||||||
.fgprincipal{font-size: large; font-weight: bold}
|
.fgprincipal{font-size: large; font-weight: bold}
|
||||||
"""
|
"""
|
||||||
@ -83,7 +83,7 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
del it['href']
|
del it['href']
|
||||||
del it['title']
|
del it['title']
|
||||||
for item in soup.findAll('p'):
|
for item in soup.findAll('p'):
|
||||||
it = item.find('h3')
|
it = item.find('h3')
|
||||||
if it:
|
if it:
|
||||||
it.name='span'
|
it.name='span'
|
||||||
return soup
|
return soup
|
||||||
|
@ -17,7 +17,7 @@ class Perfil(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
masthead_url = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif'
|
masthead_url = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
|
@ -13,7 +13,7 @@ class Reptantes(BasicNewsRecipe):
|
|||||||
description = u"cada vez que te haces acupuntura, tu muñeco vudú sufre en algún lado"
|
description = u"cada vez que te haces acupuntura, tu muñeco vudú sufre en algún lado"
|
||||||
oldest_article = 130
|
oldest_article = 130
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
|
|||||||
title = 'Revista Bla'
|
title = 'Revista Bla'
|
||||||
__author__ = 'Gustavo Azambuja'
|
__author__ = 'Gustavo Azambuja'
|
||||||
description = 'Moda | Uruguay'
|
description = 'Moda | Uruguay'
|
||||||
language = 'es'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 5
|
recursion = 5
|
||||||
|
@ -20,7 +20,7 @@ class Veintitres(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es_AR'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
@ -360,6 +360,9 @@ class LinuxFreeze(Command):
|
|||||||
def main():
|
def main():
|
||||||
try:
|
try:
|
||||||
sys.argv[0] = sys.calibre_basename
|
sys.argv[0] = sys.calibre_basename
|
||||||
|
dfv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
|
||||||
|
if dfv and os.path.exists(dfv):
|
||||||
|
sys.path.insert(0, os.path.abspath(dfv))
|
||||||
set_default_encoding()
|
set_default_encoding()
|
||||||
set_helper()
|
set_helper()
|
||||||
set_qt_plugin_path()
|
set_qt_plugin_path()
|
||||||
|
@ -139,6 +139,13 @@ class CHMReader(CHMFile):
|
|||||||
if self.hhc_path not in files and files:
|
if self.hhc_path not in files and files:
|
||||||
self.hhc_path = files[0]
|
self.hhc_path = files[0]
|
||||||
|
|
||||||
|
if self.hhc_path == '.hhc' and self.hhc_path not in files:
|
||||||
|
from calibre import walk
|
||||||
|
for x in walk(output_dir):
|
||||||
|
if os.path.basename(x).lower() in ('index.htm', 'index.html'):
|
||||||
|
self.hhc_path = os.path.relpath(x, output_dir)
|
||||||
|
break
|
||||||
|
|
||||||
def _reformat(self, data, htmlpath):
|
def _reformat(self, data, htmlpath):
|
||||||
try:
|
try:
|
||||||
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||||
|
@ -53,7 +53,7 @@ def find_pages(dir, sort_on_mtime=False, verbose=False):
|
|||||||
prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages]))
|
prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages]))
|
||||||
return pages
|
return pages
|
||||||
|
|
||||||
class PageProcessor(list):
|
class PageProcessor(list): # {{{
|
||||||
'''
|
'''
|
||||||
Contains the actual image rendering logic. See :method:`render` and
|
Contains the actual image rendering logic. See :method:`render` and
|
||||||
:method:`process_pages`.
|
:method:`process_pages`.
|
||||||
@ -111,6 +111,13 @@ class PageProcessor(list):
|
|||||||
|
|
||||||
SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
|
SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self.opts.comic_image_size:
|
||||||
|
SCRWIDTH, SCRHEIGHT = map(int, [x.strip() for x in
|
||||||
|
self.opts.comic_image_size.split('x')])
|
||||||
|
except:
|
||||||
|
pass # Ignore
|
||||||
|
|
||||||
if self.opts.keep_aspect_ratio:
|
if self.opts.keep_aspect_ratio:
|
||||||
# Preserve the aspect ratio by adding border
|
# Preserve the aspect ratio by adding border
|
||||||
aspect = float(sizex) / float(sizey)
|
aspect = float(sizex) / float(sizey)
|
||||||
@ -170,6 +177,7 @@ class PageProcessor(list):
|
|||||||
dest = dest[:-1]
|
dest = dest[:-1]
|
||||||
os.rename(dest+'8', dest)
|
os.rename(dest+'8', dest)
|
||||||
self.append(dest)
|
self.append(dest)
|
||||||
|
# }}}
|
||||||
|
|
||||||
def render_pages(tasks, dest, opts, notification=lambda x, y: x):
|
def render_pages(tasks, dest, opts, notification=lambda x, y: x):
|
||||||
'''
|
'''
|
||||||
@ -291,7 +299,11 @@ class ComicInput(InputFormatPlugin):
|
|||||||
OptionRecommendation(name='no_process', recommended_value=False,
|
OptionRecommendation(name='no_process', recommended_value=False,
|
||||||
help=_("Apply no processing to the image")),
|
help=_("Apply no processing to the image")),
|
||||||
OptionRecommendation(name='dont_grayscale', recommended_value=False,
|
OptionRecommendation(name='dont_grayscale', recommended_value=False,
|
||||||
help=_('Do not convert the image to grayscale (black and white)'))
|
help=_('Do not convert the image to grayscale (black and white)')),
|
||||||
|
OptionRecommendation(name='comic_image_size', recommended_value=None,
|
||||||
|
help=_('Specify the image size as widthxheight pixels. Normally,'
|
||||||
|
' an image size is automatically calculated from the output '
|
||||||
|
'profile, this option overrides it.')),
|
||||||
])
|
])
|
||||||
|
|
||||||
recommendations = set([
|
recommendations = set([
|
||||||
|
@ -24,10 +24,11 @@ class HeuristicProcessor(object):
|
|||||||
self.chapters_no_title = 0
|
self.chapters_no_title = 0
|
||||||
self.chapters_with_title = 0
|
self.chapters_with_title = 0
|
||||||
self.blanks_deleted = False
|
self.blanks_deleted = False
|
||||||
|
self.blanks_between_paragraphs = False
|
||||||
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
|
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
|
||||||
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|spacer)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
||||||
self.softbreak = re.compile(r'\s*(?P<openline><p(?=\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
||||||
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
|
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
|
||||||
|
|
||||||
def is_pdftohtml(self, src):
|
def is_pdftohtml(self, src):
|
||||||
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
||||||
@ -42,8 +43,10 @@ class HeuristicProcessor(object):
|
|||||||
" chapters. - " + unicode(chap))
|
" chapters. - " + unicode(chap))
|
||||||
return '<h2>'+chap+'</h2>\n'
|
return '<h2>'+chap+'</h2>\n'
|
||||||
else:
|
else:
|
||||||
txt_chap = html2text(chap)
|
delete_whitespace = re.compile('^\s*(?P<c>.*?)\s*$')
|
||||||
txt_title = html2text(title)
|
delete_quotes = re.compile('\'\"')
|
||||||
|
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(chap)))
|
||||||
|
txt_title = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(title)))
|
||||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
||||||
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
|
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
|
||||||
@ -375,9 +378,9 @@ class HeuristicProcessor(object):
|
|||||||
html = re.sub('<p\s?/>', '', html)
|
html = re.sub('<p\s?/>', '', html)
|
||||||
# Get rid of empty span, bold, font, em, & italics tags
|
# Get rid of empty span, bold, font, em, & italics tags
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
|
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
|
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
||||||
self.deleted_nbsps = True
|
self.deleted_nbsps = True
|
||||||
return html
|
return html
|
||||||
|
|
||||||
@ -416,6 +419,28 @@ class HeuristicProcessor(object):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def detect_blank_formatting(self, html):
|
||||||
|
blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE)
|
||||||
|
blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE)
|
||||||
|
|
||||||
|
def markup_spacers(match):
|
||||||
|
blanks = match.group(0)
|
||||||
|
blanks = self.blankreg.sub('\n<p class="spacer"> </p>', blanks)
|
||||||
|
return blanks
|
||||||
|
html = blanks_before_headings.sub(markup_spacers, html)
|
||||||
|
html = blanks_after_headings.sub(markup_spacers, html)
|
||||||
|
if self.html_preprocess_sections > self.min_chapters:
|
||||||
|
html = re.sub('(?si)^.*?(?=<h\d)', markup_spacers, html)
|
||||||
|
return html
|
||||||
|
|
||||||
|
def detect_soft_breaks(self, html):
|
||||||
|
if not self.blanks_deleted and self.blanks_between_paragraphs:
|
||||||
|
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
|
||||||
|
else:
|
||||||
|
html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, html):
|
def __call__(self, html):
|
||||||
self.log.debug("********* Heuristic processing HTML *********")
|
self.log.debug("********* Heuristic processing HTML *********")
|
||||||
@ -457,23 +482,23 @@ class HeuristicProcessor(object):
|
|||||||
#html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
|
#html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
|
||||||
|
|
||||||
# Determine whether the document uses interleaved blank lines
|
# Determine whether the document uses interleaved blank lines
|
||||||
blanks_between_paragraphs = self.analyze_blanks(html)
|
self.blanks_between_paragraphs = self.analyze_blanks(html)
|
||||||
|
|
||||||
#self.dump(html, 'before_chapter_markup')
|
#self.dump(html, 'before_chapter_markup')
|
||||||
# detect chapters/sections to match xpath or splitting logic
|
# detect chapters/sections to match xpath or splitting logic
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'markup_chapter_headings', False):
|
if getattr(self.extra_opts, 'markup_chapter_headings', False):
|
||||||
html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
|
html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'italicize_common_cases', False):
|
if getattr(self.extra_opts, 'italicize_common_cases', False):
|
||||||
html = self.markup_italicis(html)
|
html = self.markup_italicis(html)
|
||||||
|
|
||||||
# If more than 40% of the lines are empty paragraphs and the user has enabled delete
|
# If more than 40% of the lines are empty paragraphs and the user has enabled delete
|
||||||
# blank paragraphs then delete blank lines to clean up spacing
|
# blank paragraphs then delete blank lines to clean up spacing
|
||||||
if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
|
if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
|
||||||
self.log.debug("deleting blank lines")
|
self.log.debug("deleting blank lines")
|
||||||
self.blanks_deleted = True
|
self.blanks_deleted = True
|
||||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
|
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
|
||||||
html = self.blankreg.sub('', html)
|
html = self.blankreg.sub('', html)
|
||||||
|
|
||||||
# Determine line ending type
|
# Determine line ending type
|
||||||
@ -525,14 +550,13 @@ class HeuristicProcessor(object):
|
|||||||
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
||||||
|
html = self.detect_blank_formatting(html)
|
||||||
|
html = self.detect_soft_breaks(html)
|
||||||
# Center separator lines
|
# Center separator lines
|
||||||
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
|
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
|
||||||
if not self.blanks_deleted:
|
#html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
|
||||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
|
|
||||||
html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
|
|
||||||
|
|
||||||
if self.deleted_nbsps:
|
if self.deleted_nbsps:
|
||||||
# put back non-breaking spaces in empty paragraphs to preserve original formatting
|
# put back non-breaking spaces in empty paragraphs to preserve original formatting
|
||||||
html = self.blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
||||||
html = self.softbreak.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
|
||||||
return html
|
return html
|
||||||
|
@ -175,6 +175,19 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
'EPUB files with DTBook markup are not supported')
|
'EPUB files with DTBook markup are not supported')
|
||||||
|
|
||||||
|
for x in list(opf.iterspine()):
|
||||||
|
ref = x.get('idref', None)
|
||||||
|
if ref is None:
|
||||||
|
x.getparent().remove(x)
|
||||||
|
continue
|
||||||
|
for y in opf.itermanifest():
|
||||||
|
if y.get('id', None) == ref and y.get('media-type', None) in \
|
||||||
|
('application/vnd.adobe-page-template+xml',):
|
||||||
|
p = x.getparent()
|
||||||
|
if p is not None:
|
||||||
|
p.remove(x)
|
||||||
|
break
|
||||||
|
|
||||||
with open('content.opf', 'wb') as nopf:
|
with open('content.opf', 'wb') as nopf:
|
||||||
nopf.write(opf.render())
|
nopf.write(opf.render())
|
||||||
|
|
||||||
|
@ -83,6 +83,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
os.mkdir(debug_dir)
|
os.mkdir(debug_dir)
|
||||||
debug_dir = 'rtfdebug'
|
debug_dir = 'rtfdebug'
|
||||||
run_lev = 4
|
run_lev = 4
|
||||||
|
self.log('Running RTFParser in debug mode')
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
parser = ParseRtf(
|
parser = ParseRtf(
|
||||||
@ -230,22 +231,6 @@ class RTFInput(InputFormatPlugin):
|
|||||||
with open('styles.css', 'ab') as f:
|
with open('styles.css', 'ab') as f:
|
||||||
f.write(css)
|
f.write(css)
|
||||||
|
|
||||||
# def preprocess(self, fname):
|
|
||||||
# self.log('\tPreprocessing to convert unicode characters')
|
|
||||||
# try:
|
|
||||||
# data = open(fname, 'rb').read()
|
|
||||||
# from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
|
|
||||||
# tokenizer = RtfTokenizer(data)
|
|
||||||
# tokens = RtfTokenParser(tokenizer.tokens)
|
|
||||||
# data = tokens.toRTF()
|
|
||||||
# fname = 'preprocessed.rtf'
|
|
||||||
# with open(fname, 'wb') as f:
|
|
||||||
# f.write(data)
|
|
||||||
# except:
|
|
||||||
# self.log.exception(
|
|
||||||
# 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
|
|
||||||
# return fname
|
|
||||||
|
|
||||||
def convert_borders(self, doc):
|
def convert_borders(self, doc):
|
||||||
border_styles = []
|
border_styles = []
|
||||||
style_map = {}
|
style_map = {}
|
||||||
@ -280,8 +265,6 @@ class RTFInput(InputFormatPlugin):
|
|||||||
self.opts = options
|
self.opts = options
|
||||||
self.log = log
|
self.log = log
|
||||||
self.log('Converting RTF to XML...')
|
self.log('Converting RTF to XML...')
|
||||||
#Name of the preprocesssed RTF file
|
|
||||||
# fname = self.preprocess(stream.name)
|
|
||||||
try:
|
try:
|
||||||
xml = self.generate_xml(stream.name)
|
xml = self.generate_xml(stream.name)
|
||||||
except RtfInvalidCodeException, e:
|
except RtfInvalidCodeException, e:
|
||||||
@ -335,3 +318,4 @@ class RTFInput(InputFormatPlugin):
|
|||||||
opf.render(open('metadata.opf', 'wb'))
|
opf.render(open('metadata.opf', 'wb'))
|
||||||
return os.path.abspath('metadata.opf')
|
return os.path.abspath('metadata.opf')
|
||||||
|
|
||||||
|
|
||||||
|
@ -238,6 +238,8 @@ class ParseRtf:
|
|||||||
bug_handler = RtfInvalidCodeException,
|
bug_handler = RtfInvalidCodeException,
|
||||||
)
|
)
|
||||||
enc = 'cp' + encode_obj.get_codepage()
|
enc = 'cp' + encode_obj.get_codepage()
|
||||||
|
if enc == 'cp10000':
|
||||||
|
enc = 'mac_roman'
|
||||||
msg = 'Exception in token processing'
|
msg = 'Exception in token processing'
|
||||||
if check_encoding_obj.check_encoding(self.__file, enc):
|
if check_encoding_obj.check_encoding(self.__file, enc):
|
||||||
file_name = self.__file if isinstance(self.__file, str) \
|
file_name = self.__file if isinstance(self.__file, str) \
|
||||||
|
@ -15,8 +15,10 @@
|
|||||||
# #
|
# #
|
||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, tempfile, re
|
import sys, os, tempfile, re
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
|
|
||||||
class Colors:
|
class Colors:
|
||||||
"""
|
"""
|
||||||
Change lines with color info from color numbers to the actual color names.
|
Change lines with color info from color numbers to the actual color names.
|
||||||
@ -40,8 +42,10 @@ class Colors:
|
|||||||
self.__file = in_file
|
self.__file = in_file
|
||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__bug_handler = bug_handler
|
self.__bug_handler = bug_handler
|
||||||
|
self.__line = 0
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
Initiate all values.
|
Initiate all values.
|
||||||
@ -61,6 +65,7 @@ class Colors:
|
|||||||
self.__color_num = 1
|
self.__color_num = 1
|
||||||
self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
|
self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
|
||||||
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
||||||
|
|
||||||
def __before_color_func(self, line):
|
def __before_color_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -76,6 +81,7 @@ class Colors:
|
|||||||
if self.__token_info == 'mi<mk<clrtbl-beg':
|
if self.__token_info == 'mi<mk<clrtbl-beg':
|
||||||
self.__state = 'in_color_table'
|
self.__state = 'in_color_table'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __default_color_func(self, line):
|
def __default_color_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -87,6 +93,7 @@ class Colors:
|
|||||||
"""
|
"""
|
||||||
hex_num = line[-3:-1]
|
hex_num = line[-3:-1]
|
||||||
self.__color_string += hex_num
|
self.__color_string += hex_num
|
||||||
|
|
||||||
def __blue_func(self, line):
|
def __blue_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -109,6 +116,7 @@ class Colors:
|
|||||||
)
|
)
|
||||||
self.__color_num += 1
|
self.__color_num += 1
|
||||||
self.__color_string = '#'
|
self.__color_string = '#'
|
||||||
|
|
||||||
def __in_color_func(self, line):
|
def __in_color_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -127,12 +135,13 @@ class Colors:
|
|||||||
self.__state = 'after_color_table'
|
self.__state = 'after_color_table'
|
||||||
else:
|
else:
|
||||||
action = self.__state_dict.get(self.__token_info)
|
action = self.__state_dict.get(self.__token_info)
|
||||||
if action == None:
|
if action is None:
|
||||||
sys.stderr.write('in module colors.py\n'
|
sys.stderr.write('in module colors.py\n'
|
||||||
'function is self.__in_color_func\n'
|
'function is self.__in_color_func\n'
|
||||||
'no action for %s' % self.__token_info
|
'no action for %s' % self.__token_info
|
||||||
)
|
)
|
||||||
action(line)
|
action(line)
|
||||||
|
|
||||||
def __after_color_func(self, line):
|
def __after_color_func(self, line):
|
||||||
"""
|
"""
|
||||||
Check the to see if it contains color info. If it does, extract the
|
Check the to see if it contains color info. If it does, extract the
|
||||||
@ -180,6 +189,7 @@ class Colors:
|
|||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
||||||
|
|
||||||
def __sub_from_line_color(self, match_obj):
|
def __sub_from_line_color(self, match_obj):
|
||||||
num = match_obj.group(1)
|
num = match_obj.group(1)
|
||||||
try:
|
try:
|
||||||
@ -191,25 +201,27 @@ class Colors:
|
|||||||
else:
|
else:
|
||||||
return 'bdr-color_:no-value'
|
return 'bdr-color_:no-value'
|
||||||
hex_num = self.__figure_num(num)
|
hex_num = self.__figure_num(num)
|
||||||
return_value = 'bdr-color_:%s' % hex_num
|
return 'bdr-color_:%s' % hex_num
|
||||||
return return_value
|
|
||||||
def __figure_num(self, num):
|
def __figure_num(self, num):
|
||||||
if num == 0:
|
if num == 0:
|
||||||
hex_num = 'false'
|
hex_num = 'false'
|
||||||
else:
|
else:
|
||||||
hex_num = self.__color_dict.get(num)
|
hex_num = self.__color_dict.get(num)
|
||||||
if hex_num == None:
|
if hex_num is None:
|
||||||
if self.__run_level > 3:
|
|
||||||
msg = 'no value in self.__color_dict for key %s\n' % num
|
|
||||||
raise self.__bug_hanlder, msg
|
|
||||||
if hex_num == None:
|
|
||||||
hex_num = '0'
|
hex_num = '0'
|
||||||
|
if self.__run_level > 5:
|
||||||
|
msg = 'no value in self.__color_dict' \
|
||||||
|
'for key %s at line %d\n' % (num, self.__line)
|
||||||
|
raise self.__bug_handler, msg
|
||||||
return hex_num
|
return hex_num
|
||||||
|
|
||||||
def __do_nothing_func(self, line):
|
def __do_nothing_func(self, line):
|
||||||
"""
|
"""
|
||||||
Bad RTF will have text in the color table
|
Bad RTF will have text in the color table
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def convert_colors(self):
|
def convert_colors(self):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -226,20 +238,16 @@ class Colors:
|
|||||||
info, and substitute the number with the hex number.
|
info, and substitute the number with the hex number.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = 1
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__line+=1
|
||||||
line_to_read = read_obj.readline()
|
self.__token_info = line[:16]
|
||||||
line = line_to_read
|
action = self.__state_dict.get(self.__state)
|
||||||
self.__token_info = line[:16]
|
if action is None:
|
||||||
action = self.__state_dict.get(self.__state)
|
sys.stderr.write('no matching state in module fonts.py\n')
|
||||||
if action == None:
|
sys.stderr.write(self.__state + '\n')
|
||||||
sys.stderr.write('no no matching state in module fonts.py\n')
|
action(line)
|
||||||
sys.stderr.write(self.__state + '\n')
|
|
||||||
action(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "color.data")
|
copy_obj.copy_file(self.__write_to, "color.data")
|
||||||
|
@ -33,13 +33,13 @@ class ConvertToTags:
|
|||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__dtd_path = dtd_path
|
self.__dtd_path = dtd_path
|
||||||
self.__no_dtd = no_dtd
|
self.__no_dtd = no_dtd
|
||||||
if encoding != 'mac_roman':
|
self.__encoding = 'cp' + encoding
|
||||||
self.__encoding = 'cp' + encoding
|
if encoding == 'mac_roman':
|
||||||
else:
|
|
||||||
self.__encoding = 'mac_roman'
|
self.__encoding = 'mac_roman'
|
||||||
self.__indent = indent
|
self.__indent = indent
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
|
self.__convert_utf = False
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
@ -213,7 +213,8 @@ class ConvertToTags:
|
|||||||
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
|
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
|
||||||
|
self.__convert_utf = True
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||||
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
|
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
|
||||||
@ -253,15 +254,28 @@ class ConvertToTags:
|
|||||||
an empty tag function.
|
an empty tag function.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
self.__write_dec()
|
self.__write_dec()
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
action = self.__state_dict.get(self.__token_info)
|
action = self.__state_dict.get(self.__token_info)
|
||||||
if action is not None:
|
if action is not None:
|
||||||
action(line)
|
action(line)
|
||||||
self.__write_obj.close()
|
self.__write_obj.close()
|
||||||
|
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
|
||||||
|
if self.__convert_utf:
|
||||||
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
|
copy_obj.rename(self.__write_to, self.__file)
|
||||||
|
with open(self.__file, 'r') as read_obj:
|
||||||
|
with open(self.__write_to, 'w') as write_obj:
|
||||||
|
file = read_obj.read()
|
||||||
|
try:
|
||||||
|
file = file.decode(self.__encoding)
|
||||||
|
write_obj.write(file.encode('utf-8'))
|
||||||
|
except:
|
||||||
|
sys.stderr.write('Conversion to UTF-8 is not possible,'
|
||||||
|
' encoding should be very carefully checked')
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
||||||
|
@ -75,12 +75,16 @@ class DefaultEncoding:
|
|||||||
self._encoding()
|
self._encoding()
|
||||||
self.__datafetched = True
|
self.__datafetched = True
|
||||||
code_page = 'ansicpg' + self.__code_page
|
code_page = 'ansicpg' + self.__code_page
|
||||||
|
if self.__code_page == '10000':
|
||||||
|
self.__code_page = 'mac_roman'
|
||||||
return self.__platform, code_page, self.__default_num
|
return self.__platform, code_page, self.__default_num
|
||||||
|
|
||||||
def get_codepage(self):
|
def get_codepage(self):
|
||||||
if not self.__datafetched:
|
if not self.__datafetched:
|
||||||
self._encoding()
|
self._encoding()
|
||||||
self.__datafetched = True
|
self.__datafetched = True
|
||||||
|
if self.__code_page == '10000':
|
||||||
|
self.__code_page = 'mac_roman'
|
||||||
return self.__code_page
|
return self.__code_page
|
||||||
|
|
||||||
def get_platform(self):
|
def get_platform(self):
|
||||||
|
@ -16,7 +16,9 @@
|
|||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, tempfile
|
import sys, os, tempfile
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
|
|
||||||
class Fonts:
|
class Fonts:
|
||||||
"""
|
"""
|
||||||
Change lines with font info from font numbers to the actual font names.
|
Change lines with font info from font numbers to the actual font names.
|
||||||
@ -45,6 +47,7 @@ class Fonts:
|
|||||||
self.__default_font_num = default_font_num
|
self.__default_font_num = default_font_num
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
Initiate all values.
|
Initiate all values.
|
||||||
@ -67,6 +70,7 @@ class Fonts:
|
|||||||
self.__font_table = {}
|
self.__font_table = {}
|
||||||
# individual font written
|
# individual font written
|
||||||
self.__wrote_ind_font = 0
|
self.__wrote_ind_font = 0
|
||||||
|
|
||||||
def __default_func(self, line):
|
def __default_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -79,6 +83,7 @@ class Fonts:
|
|||||||
if self.__token_info == 'mi<mk<fonttb-beg':
|
if self.__token_info == 'mi<mk<fonttb-beg':
|
||||||
self.__state = 'font_table'
|
self.__state = 'font_table'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __font_table_func(self, line):
|
def __font_table_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -101,6 +106,7 @@ class Fonts:
|
|||||||
self.__font_num = self.__default_font_num
|
self.__font_num = self.__default_font_num
|
||||||
self.__text_line = ''
|
self.__text_line = ''
|
||||||
##self.__write_obj.write(line)
|
##self.__write_obj.write(line)
|
||||||
|
|
||||||
def __font_in_table_func(self, line):
|
def __font_in_table_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -138,6 +144,7 @@ class Fonts:
|
|||||||
elif self.__token_info == 'mi<mk<fonttb-end':
|
elif self.__token_info == 'mi<mk<fonttb-end':
|
||||||
self.__found_end_font_table_func()
|
self.__found_end_font_table_func()
|
||||||
self.__state = 'after_font_table'
|
self.__state = 'after_font_table'
|
||||||
|
|
||||||
def __found_end_font_table_func(self):
|
def __found_end_font_table_func(self):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -150,7 +157,8 @@ class Fonts:
|
|||||||
if not self.__wrote_ind_font:
|
if not self.__wrote_ind_font:
|
||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'mi<tg<empty-att_'
|
'mi<tg<empty-att_'
|
||||||
'<font-in-table<name>Times<num>0\n' )
|
'<font-in-table<name>Times<num>0\n')
|
||||||
|
|
||||||
def __after_font_table_func(self, line):
|
def __after_font_table_func(self, line):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -169,7 +177,7 @@ class Fonts:
|
|||||||
if self.__token_info == 'cw<ci<font-style':
|
if self.__token_info == 'cw<ci<font-style':
|
||||||
font_num = line[20:-1]
|
font_num = line[20:-1]
|
||||||
font_name = self.__font_table.get(font_num)
|
font_name = self.__font_table.get(font_num)
|
||||||
if font_name == None:
|
if font_name is None:
|
||||||
if self.__run_level > 3:
|
if self.__run_level > 3:
|
||||||
msg = 'no value for %s in self.__font_table\n' % font_num
|
msg = 'no value for %s in self.__font_table\n' % font_num
|
||||||
raise self.__bug_handler, msg
|
raise self.__bug_handler, msg
|
||||||
@ -182,6 +190,7 @@ class Fonts:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def convert_fonts(self):
|
def convert_fonts(self):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -197,20 +206,15 @@ class Fonts:
|
|||||||
info. Substitute a font name for a font number.
|
info. Substitute a font name for a font number.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = 1
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__token_info = line[:16]
|
||||||
line_to_read = read_obj.readline()
|
action = self.__state_dict.get(self.__state)
|
||||||
line = line_to_read
|
if action is None:
|
||||||
self.__token_info = line[:16]
|
sys.stderr.write('no matching state in module fonts.py\n' \
|
||||||
action = self.__state_dict.get(self.__state)
|
+ self.__state + '\n')
|
||||||
if action == None:
|
action(line)
|
||||||
sys.stderr.write('no no matching state in module fonts.py\n')
|
|
||||||
sys.stderr.write(self.__state + '\n')
|
|
||||||
action(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
default_font_name = self.__font_table.get(self.__default_font_num)
|
default_font_name = self.__font_table.get(self.__default_font_num)
|
||||||
if not default_font_name:
|
if not default_font_name:
|
||||||
default_font_name = 'Not Defined'
|
default_font_name = 'Not Defined'
|
||||||
|
@ -43,7 +43,7 @@ class GetCharMap:
|
|||||||
def get_char_map(self, map):
|
def get_char_map(self, map):
|
||||||
if map == 'ansicpg0':
|
if map == 'ansicpg0':
|
||||||
map = 'ansicpg1250'
|
map = 'ansicpg1250'
|
||||||
if map in ('ansicpg10000', '10000'):
|
if map == 'ansicpg10000':
|
||||||
map = 'mac_roman'
|
map = 'mac_roman'
|
||||||
found_map = False
|
found_map = False
|
||||||
map_dict = {}
|
map_dict = {}
|
||||||
|
@ -126,12 +126,6 @@ class Tokenize:
|
|||||||
tokens = re.split(self.__splitexp, input_file)
|
tokens = re.split(self.__splitexp, input_file)
|
||||||
#remove empty tokens and \n
|
#remove empty tokens and \n
|
||||||
return filter(lambda x: len(x) > 0 and x != '\n', tokens)
|
return filter(lambda x: len(x) > 0 and x != '\n', tokens)
|
||||||
#input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
|
|
||||||
# line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
|
|
||||||
# this is for older RTF
|
|
||||||
#line = re.sub(self.__par_exp, '\\par ', line)
|
|
||||||
#return filter(lambda x: len(x) > 0, \
|
|
||||||
#(self.__remove_line.sub('', x) for x in tokens))
|
|
||||||
|
|
||||||
def __compile_expressions(self):
|
def __compile_expressions(self):
|
||||||
SIMPLE_RPL = {
|
SIMPLE_RPL = {
|
||||||
@ -160,7 +154,7 @@ class Tokenize:
|
|||||||
}
|
}
|
||||||
self.__replace_spchar = MReplace(SIMPLE_RPL)
|
self.__replace_spchar = MReplace(SIMPLE_RPL)
|
||||||
#add ;? in case of char following \u
|
#add ;? in case of char following \u
|
||||||
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
|
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
|
||||||
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
|
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
|
||||||
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
|
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
|
||||||
#manage upr/ud situations
|
#manage upr/ud situations
|
||||||
@ -172,14 +166,21 @@ class Tokenize:
|
|||||||
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
||||||
#this is for old RTF
|
#this is for old RTF
|
||||||
self.__par_exp = re.compile(r'\\\n+')
|
self.__par_exp = re.compile(r'\\\n+')
|
||||||
# self.__par_exp = re.compile(r'\\$')
|
#handle cw using a digit as argument and without space as delimiter
|
||||||
|
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
|
||||||
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
||||||
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
||||||
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
|
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
|
||||||
#self.__remove_line = re.compile(r'\n+')
|
#self.__remove_line = re.compile(r'\n+')
|
||||||
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
|
|
||||||
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
|
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
|
||||||
|
|
||||||
|
def __correct_spliting(self, token):
|
||||||
|
match_obj = re.search(self.__cwdigit_exp, token)
|
||||||
|
if match_obj is None:
|
||||||
|
return token
|
||||||
|
else:
|
||||||
|
return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
|
||||||
|
|
||||||
def tokenize(self):
|
def tokenize(self):
|
||||||
"""Main class for handling other methods. Reads the file \
|
"""Main class for handling other methods. Reads the file \
|
||||||
, uses method self.sub_reg to make basic substitutions,\
|
, uses method self.sub_reg to make basic substitutions,\
|
||||||
@ -187,7 +188,7 @@ class Tokenize:
|
|||||||
#read
|
#read
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
input_file = read_obj.read()
|
input_file = read_obj.read()
|
||||||
|
|
||||||
#process simple replacements and split giving us a correct list
|
#process simple replacements and split giving us a correct list
|
||||||
#remove '' and \n in the process
|
#remove '' and \n in the process
|
||||||
tokens = self.__sub_reg_split(input_file)
|
tokens = self.__sub_reg_split(input_file)
|
||||||
@ -195,7 +196,9 @@ class Tokenize:
|
|||||||
tokens = map(self.__unicode_process, tokens)
|
tokens = map(self.__unicode_process, tokens)
|
||||||
#remove empty items created by removing \uc
|
#remove empty items created by removing \uc
|
||||||
tokens = filter(lambda x: len(x) > 0, tokens)
|
tokens = filter(lambda x: len(x) > 0, tokens)
|
||||||
|
#handles bothersome cases
|
||||||
|
tokens = map(self.__correct_spliting, tokens)
|
||||||
|
|
||||||
#write
|
#write
|
||||||
with open(self.__write_to, 'wb') as write_obj:
|
with open(self.__write_to, 'wb') as write_obj:
|
||||||
write_obj.write('\n'.join(tokens))
|
write_obj.write('\n'.join(tokens))
|
||||||
@ -203,11 +206,9 @@ class Tokenize:
|
|||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "tokenize.data")
|
copy_obj.copy_file(self.__write_to, "tokenize.data")
|
||||||
# if self.__out_file:
|
|
||||||
# self.__file = self.__out_file
|
|
||||||
copy_obj.rename(self.__write_to, self.__file)
|
copy_obj.rename(self.__write_to, self.__file)
|
||||||
os.remove(self.__write_to)
|
os.remove(self.__write_to)
|
||||||
|
|
||||||
#self.__special_tokens = [ '_', '~', "'", '{', '}' ]
|
#self.__special_tokens = [ '_', '~', "'", '{', '}' ]
|
||||||
|
|
||||||
# import sys
|
# import sys
|
||||||
@ -223,4 +224,4 @@ class Tokenize:
|
|||||||
|
|
||||||
|
|
||||||
# if __name__ == '__main__':
|
# if __name__ == '__main__':
|
||||||
# sys.exit(main())
|
# sys.exit(main())
|
||||||
|
@ -22,7 +22,7 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
|
['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
|
||||||
'despeckle', 'no_sort', 'no_process', 'landscape',
|
'despeckle', 'no_sort', 'no_process', 'landscape',
|
||||||
'dont_sharpen', 'disable_trim', 'wide', 'output_format',
|
'dont_sharpen', 'disable_trim', 'wide', 'output_format',
|
||||||
'dont_grayscale']
|
'dont_grayscale', 'comic_image_size']
|
||||||
)
|
)
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
for x in get_option('output_format').option.choices:
|
for x in get_option('output_format').option.choices:
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>599</width>
|
<width>599</width>
|
||||||
<height>345</height>
|
<height>398</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
@ -37,70 +37,70 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="0">
|
<item row="4" column="0">
|
||||||
<widget class="QCheckBox" name="opt_dont_normalize">
|
<widget class="QCheckBox" name="opt_dont_normalize">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Disable &normalize</string>
|
<string>Disable &normalize</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="0">
|
<item row="5" column="0">
|
||||||
<widget class="QCheckBox" name="opt_keep_aspect_ratio">
|
<widget class="QCheckBox" name="opt_keep_aspect_ratio">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Keep &aspect ratio</string>
|
<string>Keep &aspect ratio</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="0">
|
<item row="6" column="0">
|
||||||
<widget class="QCheckBox" name="opt_dont_sharpen">
|
<widget class="QCheckBox" name="opt_dont_sharpen">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Disable &Sharpening</string>
|
<string>Disable &Sharpening</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="6" column="0">
|
<item row="7" column="0">
|
||||||
<widget class="QCheckBox" name="opt_disable_trim">
|
<widget class="QCheckBox" name="opt_disable_trim">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Disable &Trimming</string>
|
<string>Disable &Trimming</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="7" column="0">
|
<item row="8" column="0">
|
||||||
<widget class="QCheckBox" name="opt_wide">
|
<widget class="QCheckBox" name="opt_wide">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Wide</string>
|
<string>&Wide</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="8" column="0">
|
<item row="9" column="0">
|
||||||
<widget class="QCheckBox" name="opt_landscape">
|
<widget class="QCheckBox" name="opt_landscape">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Landscape</string>
|
<string>&Landscape</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="9" column="0">
|
<item row="10" column="0">
|
||||||
<widget class="QCheckBox" name="opt_right2left">
|
<widget class="QCheckBox" name="opt_right2left">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Right to left</string>
|
<string>&Right to left</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="10" column="0">
|
<item row="11" column="0">
|
||||||
<widget class="QCheckBox" name="opt_no_sort">
|
<widget class="QCheckBox" name="opt_no_sort">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Don't so&rt</string>
|
<string>Don't so&rt</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="11" column="0">
|
<item row="12" column="0">
|
||||||
<widget class="QCheckBox" name="opt_despeckle">
|
<widget class="QCheckBox" name="opt_despeckle">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>De&speckle</string>
|
<string>De&speckle</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="13" column="0">
|
<item row="14" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -120,7 +120,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="12" column="0">
|
<item row="13" column="0">
|
||||||
<widget class="QLabel" name="label">
|
<widget class="QLabel" name="label">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Output format:</string>
|
<string>&Output format:</string>
|
||||||
@ -130,7 +130,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="12" column="1">
|
<item row="13" column="1">
|
||||||
<widget class="QComboBox" name="opt_output_format"/>
|
<widget class="QComboBox" name="opt_output_format"/>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="0">
|
<item row="1" column="0">
|
||||||
@ -140,6 +140,19 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="3" column="0">
|
||||||
|
<widget class="QLabel" name="label_2">
|
||||||
|
<property name="text">
|
||||||
|
<string>Override image &size:</string>
|
||||||
|
</property>
|
||||||
|
<property name="buddy">
|
||||||
|
<cstring>opt_comic_image_size</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="3" column="1">
|
||||||
|
<widget class="QLineEdit" name="opt_comic_image_size"/>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
|
@ -838,9 +838,9 @@ class DeviceMixin(object): # {{{
|
|||||||
format_count[f] = 1
|
format_count[f] = 1
|
||||||
for f in self.device_manager.device.settings().format_map:
|
for f in self.device_manager.device.settings().format_map:
|
||||||
if f in format_count.keys():
|
if f in format_count.keys():
|
||||||
formats.append((f, _('%i of %i Books' % (format_count[f], len(rows))), True if f in aval_out_formats else False))
|
formats.append((f, _('%i of %i Books') % (format_count[f], len(rows))), True if f in aval_out_formats else False)
|
||||||
elif f in aval_out_formats:
|
elif f in aval_out_formats:
|
||||||
formats.append((f, _('0 of %i Books' % len(rows)), True))
|
formats.append((f, _('0 of %i Books') % len(rows)), True)
|
||||||
d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats)
|
d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats)
|
||||||
if d.exec_() != QDialog.Accepted:
|
if d.exec_() != QDialog.Accepted:
|
||||||
return
|
return
|
||||||
|
@ -7,7 +7,7 @@ import os, shutil
|
|||||||
|
|
||||||
from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \
|
from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \
|
||||||
QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \
|
QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \
|
||||||
QLineEdit, Qt, QProgressBar, QSize, QTimer
|
QLineEdit, Qt, QProgressBar, QSize, QTimer, QIcon, QTextEdit
|
||||||
|
|
||||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||||
from calibre.library.check_library import CheckLibrary, CHECKS
|
from calibre.library.check_library import CheckLibrary, CHECKS
|
||||||
@ -16,7 +16,7 @@ from calibre import prints, as_unicode
|
|||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.library.sqlite import DBThread, OperationalError
|
from calibre.library.sqlite import DBThread, OperationalError
|
||||||
|
|
||||||
class DBCheck(QDialog):
|
class DBCheck(QDialog): # {{{
|
||||||
|
|
||||||
def __init__(self, parent, db):
|
def __init__(self, parent, db):
|
||||||
QDialog.__init__(self, parent)
|
QDialog.__init__(self, parent)
|
||||||
@ -134,7 +134,7 @@ class DBCheck(QDialog):
|
|||||||
def reject(self):
|
def reject(self):
|
||||||
self.rejected = True
|
self.rejected = True
|
||||||
QDialog.reject(self)
|
QDialog.reject(self)
|
||||||
|
# }}}
|
||||||
|
|
||||||
class Item(QTreeWidgetItem):
|
class Item(QTreeWidgetItem):
|
||||||
pass
|
pass
|
||||||
@ -146,9 +146,70 @@ class CheckLibraryDialog(QDialog):
|
|||||||
self.db = db
|
self.db = db
|
||||||
|
|
||||||
self.setWindowTitle(_('Check Library -- Problems Found'))
|
self.setWindowTitle(_('Check Library -- Problems Found'))
|
||||||
|
self.setWindowIcon(QIcon(I('debug.png')))
|
||||||
|
|
||||||
self._layout = QVBoxLayout(self)
|
self._tl = QHBoxLayout()
|
||||||
self.setLayout(self._layout)
|
self._layout = QVBoxLayout()
|
||||||
|
self.setLayout(self._tl)
|
||||||
|
self._tl.addLayout(self._layout)
|
||||||
|
self.helpw = QTextEdit(self)
|
||||||
|
self._tl.addWidget(self.helpw)
|
||||||
|
self.helpw.setReadOnly(True)
|
||||||
|
self.helpw.setText(_('''\
|
||||||
|
<h1>Help</h1>
|
||||||
|
|
||||||
|
<p>calibre stores the list of your books and their metadata in a
|
||||||
|
database. The actual book files and covers are stored as normal
|
||||||
|
files in the calibre library folder. The database contains a list of the files
|
||||||
|
and covers belonging to each book entry. This tool checks that the
|
||||||
|
actual files in the library folder on your computer match the
|
||||||
|
information in the database.</p>
|
||||||
|
|
||||||
|
<p>The result of each type of check is shown to the left. The various
|
||||||
|
checks are:
|
||||||
|
</p>
|
||||||
|
<ul>
|
||||||
|
<li><b>Invalid titles</b>: These are files and folders appearing
|
||||||
|
in the library where books titles should, but that do not have the
|
||||||
|
correct form to be a book title.</li>
|
||||||
|
<li><b>Extra titles</b>: These are extra files in your calibre
|
||||||
|
library that appear to be correctly-formed titles, but have no corresponding
|
||||||
|
entries in the database</li>
|
||||||
|
<li><b>Invalid authors</b>: These are files appearing
|
||||||
|
in the library where only author folders should be.</li>
|
||||||
|
<li><b>Extra authors</b>: These are folders in the
|
||||||
|
calibre library that appear to be authors but that do not have entries
|
||||||
|
in the database</li>
|
||||||
|
<li><b>Missing book formats</b>: These are book formats that are in
|
||||||
|
the database but have no corresponding format file in the book's folder.
|
||||||
|
<li><b>Extra book formats</b>: These are book format files found in
|
||||||
|
the book's folder but not in the database.
|
||||||
|
<li><b>Unknown files in books</b>: These are extra files in the
|
||||||
|
folder of each book that do not correspond to a known format or cover
|
||||||
|
file.</li>
|
||||||
|
<li><b>Missing cover files</b>: These represent books that are marked
|
||||||
|
in the database as having covers but the actual cover files are
|
||||||
|
missing.</li>
|
||||||
|
<li><b>Cover files not in database</b>: These are books that have
|
||||||
|
cover files but are marked as not having covers in the database.</li>
|
||||||
|
<li><b>Folder raising exception</b>: These represent folders in the
|
||||||
|
calibre library that could not be processed/understood by this
|
||||||
|
tool.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>There are two kinds of automatic fixes possible: <i>Delete
|
||||||
|
marked</i> and <i>Fix marked</i>.</p>
|
||||||
|
<p><i>Delete marked</i> is used to remove extra files/folders/covers that
|
||||||
|
have no entries in the database. Check the box next to the item you want
|
||||||
|
to delete. Use with caution.</p>
|
||||||
|
<p><i>Fix marked</i> is applicable only to covers (the two lines marked
|
||||||
|
'fixable'). In the case of missing cover files, checking the fixable
|
||||||
|
box and pushing this button will remove the cover mark from the
|
||||||
|
database for all the files in that category. In the case of extra
|
||||||
|
cover files, checking the fixable box and pushing this button will
|
||||||
|
add the cover mark to the database for all the files in that
|
||||||
|
category.</p>
|
||||||
|
'''))
|
||||||
|
|
||||||
self.log = QTreeWidget(self)
|
self.log = QTreeWidget(self)
|
||||||
self.log.itemChanged.connect(self.item_changed)
|
self.log.itemChanged.connect(self.item_changed)
|
||||||
@ -199,7 +260,7 @@ class CheckLibraryDialog(QDialog):
|
|||||||
self._layout.addLayout(h)
|
self._layout.addLayout(h)
|
||||||
|
|
||||||
self._layout.addWidget(self.bbox)
|
self._layout.addWidget(self.bbox)
|
||||||
self.resize(750, 500)
|
self.resize(950, 500)
|
||||||
self.bbox.setEnabled(True)
|
self.bbox.setEnabled(True)
|
||||||
|
|
||||||
def do_exec(self):
|
def do_exec(self):
|
||||||
@ -347,5 +408,6 @@ class CheckLibraryDialog(QDialog):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app = QApplication([])
|
app = QApplication([])
|
||||||
d = CheckLibraryDialog()
|
from calibre.library import db
|
||||||
|
d = CheckLibraryDialog(None, db())
|
||||||
d.exec_()
|
d.exec_()
|
||||||
|
@ -266,7 +266,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
|||||||
|
|
||||||
def add_plugin(self):
|
def add_plugin(self):
|
||||||
path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
|
path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
|
||||||
filters=[(_('Plugins'), ['zip'])], all_files=False,
|
filters=[(_('Plugins') + ' (*.zip)', ['zip'])], all_files=False,
|
||||||
select_only_single_file=True)
|
select_only_single_file=True)
|
||||||
if not path:
|
if not path:
|
||||||
return
|
return
|
||||||
|
@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
help = _('The fields to output when cataloging books in the '
|
help = _('The fields to output when cataloging books in the '
|
||||||
'database. Should be a comma-separated list of fields.\n'
|
'database. Should be a comma-separated list of fields.\n'
|
||||||
'Available fields: %s.\n'
|
'Available fields: %s.\n'
|
||||||
|
'plus user-created custom fields.\n'
|
||||||
'Example: %s=title,authors,tags\n'
|
'Example: %s=title,authors,tags\n'
|
||||||
"Default: '%%default'\n"
|
"Default: '%%default'\n"
|
||||||
"Applies to: BIBTEX output format")%(', '.join(FIELDS),
|
"Applies to: BIBTEX output format")%(', '.join(FIELDS),
|
||||||
@ -269,7 +270,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
dest = 'bib_cit',
|
dest = 'bib_cit',
|
||||||
action = None,
|
action = None,
|
||||||
help = _('The template for citation creation from database fields.\n'
|
help = _('The template for citation creation from database fields.\n'
|
||||||
' Should be a template with {} enclosed fields.\n'
|
'Should be a template with {} enclosed fields.\n'
|
||||||
'Available fields: %s.\n'
|
'Available fields: %s.\n'
|
||||||
"Default: '%%default'\n"
|
"Default: '%%default'\n"
|
||||||
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
|
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
|
||||||
@ -344,7 +345,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
if field == 'authors' :
|
if field == 'authors' :
|
||||||
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
|
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
|
||||||
|
|
||||||
elif field in ['title', 'publisher', 'cover', 'uuid',
|
elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
|
||||||
'author_sort', 'series'] :
|
'author_sort', 'series'] :
|
||||||
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
|
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
|
||||||
|
|
||||||
@ -378,7 +379,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
if calibre_files:
|
if calibre_files:
|
||||||
files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
|
files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
|
||||||
for format in item]
|
for format in item]
|
||||||
bibtex_entry.append(u'files = "%s"' % u', '.join(files))
|
bibtex_entry.append(u'file = "%s"' % u', '.join(files))
|
||||||
|
|
||||||
elif field == 'series_index' :
|
elif field == 'series_index' :
|
||||||
bibtex_entry.append(u'volume = "%s"' % int(item))
|
bibtex_entry.append(u'volume = "%s"' % int(item))
|
||||||
@ -474,6 +475,8 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
opts_dict = vars(opts)
|
opts_dict = vars(opts)
|
||||||
log("%s(): Generating %s" % (self.name,self.fmt))
|
log("%s(): Generating %s" % (self.name,self.fmt))
|
||||||
|
if opts.connected_device['is_device_connected']:
|
||||||
|
log(" connected_device: %s" % opts.connected_device['name'])
|
||||||
if opts_dict['search_text']:
|
if opts_dict['search_text']:
|
||||||
log(" --search='%s'" % opts_dict['search_text'])
|
log(" --search='%s'" % opts_dict['search_text'])
|
||||||
|
|
||||||
@ -548,6 +551,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
as outfile:
|
as outfile:
|
||||||
#File header
|
#File header
|
||||||
nb_entries = len(data)
|
nb_entries = len(data)
|
||||||
|
|
||||||
#check in book strict if all is ok else throw a warning into log
|
#check in book strict if all is ok else throw a warning into log
|
||||||
if bib_entry == 'book' :
|
if bib_entry == 'book' :
|
||||||
nb_books = len(filter(check_entry_book_valid, data))
|
nb_books = len(filter(check_entry_book_valid, data))
|
||||||
@ -555,6 +559,11 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
||||||
nb_entries = nb_books
|
nb_entries = nb_books
|
||||||
|
|
||||||
|
# If connected device, add 'On Device' values to data
|
||||||
|
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
|
||||||
|
for entry in data:
|
||||||
|
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
|
||||||
|
|
||||||
outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
|
outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
|
||||||
outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
|
outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
|
||||||
% (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
|
% (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -112,6 +112,16 @@ _extra_lang_codes = {
|
|||||||
'en_IE' : _('English (Ireland)'),
|
'en_IE' : _('English (Ireland)'),
|
||||||
'en_CN' : _('English (China)'),
|
'en_CN' : _('English (China)'),
|
||||||
'es_PY' : _('Spanish (Paraguay)'),
|
'es_PY' : _('Spanish (Paraguay)'),
|
||||||
|
'es_UY' : _('Spanish (Uruguay)'),
|
||||||
|
'es_AR' : _('Spanish (Argentina)'),
|
||||||
|
'es_MX' : _('Spanish (Mexico)'),
|
||||||
|
'es_CU' : _('Spanish (Cuba)'),
|
||||||
|
'es_CL' : _('Spanish (Chile)'),
|
||||||
|
'es_EC' : _('Spanish (Ecuador)'),
|
||||||
|
'es_HN' : _('Spanish (Honduras)'),
|
||||||
|
'es_VE' : _('Spanish (Venezuela)'),
|
||||||
|
'es_BO' : _('Spanish (Bolivia)'),
|
||||||
|
'es_NI' : _('Spanish (Nicaragua)'),
|
||||||
'de_AT' : _('German (AT)'),
|
'de_AT' : _('German (AT)'),
|
||||||
'fr_BE' : _('French (BE)'),
|
'fr_BE' : _('French (BE)'),
|
||||||
'nl' : _('Dutch (NL)'),
|
'nl' : _('Dutch (NL)'),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user