mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
54e7ba109d
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
|
||||
title = '180.com.uy'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
|
@ -20,7 +20,7 @@ class SieteDias(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
|
@ -58,4 +58,4 @@ class Ambito(BasicNewsRecipe):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
@ -12,7 +12,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
language = 'es'
|
||||
language = 'es_MX'
|
||||
|
||||
#feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]
|
||||
|
||||
|
@ -17,7 +17,7 @@ class Axxon_news(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
encoding = 'utf-8'
|
||||
publication_type = 'magazine'
|
||||
INDEX = 'http://axxon.com.ar/rev/'
|
||||
|
@ -18,7 +18,7 @@ class Axxon_news(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
lang = 'es-AR'
|
||||
|
||||
|
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
|
||||
title = 'bitacora.com.uy'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
|
@ -20,7 +20,7 @@ class BsAsEconomico(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
|
@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
publication_type = 'newspaper'
|
||||
INDEX = 'http://www.clarin.com'
|
||||
masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
|
||||
|
@ -14,7 +14,7 @@ class CriticaDigital(BasicNewsRecipe):
|
||||
description = 'Noticias de Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
@ -11,7 +11,7 @@ class CubaDebate(BasicNewsRecipe):
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Contra el Terorismo Mediatico'
|
||||
oldest_article = 15
|
||||
language = 'es'
|
||||
language = 'es_CU'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
@ -20,8 +20,8 @@ class CubaDebate(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = """
|
||||
#BlogTitle{font-size: xx-large; font-weight: bold}
|
||||
extra_css = """
|
||||
#BlogTitle{font-size: xx-large; font-weight: bold}
|
||||
body{font-family: Verdana, Arial, Tahoma, sans-serif}
|
||||
"""
|
||||
|
||||
@ -41,7 +41,7 @@ class CubaDebate(BasicNewsRecipe):
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
||||
remove_attributes=['width','height','lang']
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print/'
|
||||
|
||||
@ -50,5 +50,5 @@ class CubaDebate(BasicNewsRecipe):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -16,7 +16,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'es'
|
||||
language = 'de_ES'
|
||||
publication_type = 'newsportal'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
|
||||
|
@ -20,7 +20,7 @@ class Diagonales(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
|
@ -20,8 +20,8 @@ class ElMercurio(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
|
||||
language = 'es_CL'
|
||||
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -33,7 +33,7 @@ class ElMercurio(BasicNewsRecipe):
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['cont_iz_titulobajada','cont_iz_creditos_1_a','cont_iz_cuerpo']})]
|
||||
remove_tags = [dict(name='div', attrs={'id':'cont_iz_cuerpo_relacionados'})]
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Noticias de ultima hora', u'http://rss.emol.com/rss.asp?canal=0')
|
||||
,(u'Nacional', u'http://rss.emol.com/rss.asp?canal=1')
|
||||
|
@ -13,7 +13,7 @@ class ObservaDigital(BasicNewsRecipe):
|
||||
title = 'Observa Digital'
|
||||
__author__ = 'yrvn'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
|
@ -14,7 +14,7 @@ class General(BasicNewsRecipe):
|
||||
description = 'Noticias de Uruguay y el resto del mundo'
|
||||
publisher = 'EL PAIS S.A.'
|
||||
category = 'news, politics, Uruguay'
|
||||
language = 'es'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 2
|
||||
|
@ -20,7 +20,7 @@ class ElUniversal(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
language = 'es'
|
||||
language = 'es_MX'
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:Arial,Helvetica,sans-serif}
|
||||
|
@ -12,7 +12,7 @@ class ElArgentino(BasicNewsRecipe):
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, Argentina'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
@ -20,7 +20,7 @@ class ElArgentino(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
@ -28,16 +28,16 @@ class ElArgentino(BasicNewsRecipe):
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'noprint' })
|
||||
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
||||
,dict(name='a' , attrs={'target':'_blank' })
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
||||
feeds = [
|
||||
(u'Portada' , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home' )
|
||||
,(u'Pais' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs' )
|
||||
,(u'Economia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa' )
|
||||
@ -51,12 +51,12 @@ class ElArgentino(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -18,7 +18,7 @@ class ElComercio(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
language = 'es'
|
||||
language = 'es_EC'
|
||||
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
|
||||
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
|
||||
|
||||
|
@ -13,7 +13,7 @@ class ElCronista(BasicNewsRecipe):
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina'
|
||||
oldest_article = 2
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -25,14 +25,14 @@ class ElCronista(BasicNewsRecipe):
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , title
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='table', attrs={'width':'100%' })
|
||||
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
|
||||
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
|
||||
@ -69,4 +69,4 @@ class ElCronista(BasicNewsRecipe):
|
||||
if link_item:
|
||||
cover_url = index + link_item.img['src']
|
||||
return cover_url
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ class ElTiempoHn(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
language = 'es_HN'
|
||||
|
||||
lang = 'es-HN'
|
||||
direction = 'ltr'
|
||||
|
@ -18,7 +18,7 @@ class ElUniversal(BasicNewsRecipe):
|
||||
encoding = 'cp1252'
|
||||
publisher = 'El Universal'
|
||||
category = 'news, Caracas, Venezuela, world'
|
||||
language = 'es'
|
||||
language = 'es_VE'
|
||||
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
|
||||
|
||||
conversion_options = {
|
||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class ElUniversalImpresaRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
language = 'es'
|
||||
language = 'es_MX'
|
||||
version = 1
|
||||
|
||||
title = u'El Universal (Edici\u00F3n Impresa)'
|
||||
|
@ -17,7 +17,7 @@ class ElUniverso_Ecuador(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
language = 'es_EC'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'
|
||||
|
54
resources/recipes/explosm.recipe
Normal file
54
resources/recipes/explosm.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class Explosm(BasicNewsRecipe):
|
||||
title = u'Explosm Rotated'
|
||||
__author__ = 'Andromeda Rabbit'
|
||||
description = 'Explosm'
|
||||
language = 'en'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
oldest_article = 24
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
max_articles_per_feed = 10
|
||||
|
||||
feeds = [
|
||||
(u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
|
||||
]
|
||||
|
||||
#match_regexps = [r'http://www.explosm.net/comics/.*']
|
||||
|
||||
keep_only_tags = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
|
||||
remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
|
||||
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
|
||||
return feeds
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
# Skip ad pages served before actual article
|
||||
skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
|
||||
if skip_tag is None:
|
||||
return soup
|
||||
return None
|
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
|
||||
title = 'freeway.com.uy'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Revista Freeway, Montevideo, Uruguay'
|
||||
language = 'es'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 1
|
||||
|
@ -20,7 +20,7 @@ class Granma(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
|
||||
language = 'es'
|
||||
language = 'es_CU'
|
||||
|
||||
remove_javascript = True
|
||||
|
||||
|
@ -18,7 +18,7 @@ class iEco(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
publisher = 'Grupo Clarin'
|
||||
category = 'news, economia, mercados, bolsa de valores, finanzas, empresas, negocios, empleos, emprendedores, marketinguniversidades, tecnologia, agronegocios, noticias, informacion'
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
cover_url = 'http://www.ieco.clarin.com/static2/images/Tapa-PDF.gif'
|
||||
extra_css = ' #bd{font-family: sans-serif} '
|
||||
|
||||
|
@ -16,7 +16,7 @@ class Infobae(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
encoding = 'cp1252'
|
||||
masthead_url = 'http://www.infobae.com/imgs/header/header.gif'
|
||||
remove_javascript = True
|
||||
@ -25,7 +25,7 @@ class Infobae(BasicNewsRecipe):
|
||||
body{font-family:Arial,Helvetica,sans-serif;}
|
||||
.popUpTitulo{color:#0D4261; font-size: xx-large}
|
||||
'''
|
||||
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
@ -33,7 +33,7 @@ class Infobae(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
||||
|
@ -20,7 +20,7 @@ class Juventudrebelde(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
language = 'es'
|
||||
language = 'es_CU'
|
||||
|
||||
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
|
||||
remove_javascript = True
|
||||
|
@ -50,4 +50,4 @@ class LaCuarta(BasicNewsRecipe):
|
||||
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
|
||||
|
||||
|
||||
language = 'es'
|
||||
language = 'es_CL'
|
||||
|
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
|
||||
title = 'La Diaria'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
|
@ -19,7 +19,7 @@ class LaJornada_mx(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
language = 'es_MX'
|
||||
remove_empty_feeds = True
|
||||
cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf")
|
||||
masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'
|
||||
@ -34,8 +34,8 @@ class LaJornada_mx(BasicNewsRecipe):
|
||||
.credito{font-weight: bold; margin-left: 1em}
|
||||
.credito-autor{font-variant: small-caps; font-weight: bold }
|
||||
.credito-titulo{text-align: right}
|
||||
.hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em }
|
||||
.loc{font-weight: bold}
|
||||
.hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em }
|
||||
.loc{font-weight: bold}
|
||||
.carton{text-align: center}
|
||||
.credit{font-weight: bold}
|
||||
.sumario{font-weight: bold; text-align: center}
|
||||
@ -56,7 +56,7 @@ class LaJornada_mx(BasicNewsRecipe):
|
||||
,re.DOTALL|re.IGNORECASE)
|
||||
,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','credito-articulo','text','carton']})
|
||||
,dict(name='div', attrs={'id':'renderComments'})
|
||||
@ -88,4 +88,4 @@ class LaJornada_mx(BasicNewsRecipe):
|
||||
def get_article_url(self, article):
|
||||
rurl = article.get('link', None)
|
||||
return rurl.rpartition('&partner=')[0]
|
||||
|
||||
|
||||
|
@ -18,7 +18,7 @@ class LaRazon_Bol(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
language = 'es_BO'
|
||||
publication_type = 'newspaper'
|
||||
delay = 1
|
||||
remove_empty_feeds = True
|
||||
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class LaSegunda(BasicNewsRecipe):
|
||||
title = 'La Segunda'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
publisher = 'La Segunda'
|
||||
category = 'news, politics, Chile'
|
||||
oldest_article = 2
|
||||
@ -19,9 +19,9 @@ class LaSegunda(BasicNewsRecipe):
|
||||
encoding = 'cp1252'
|
||||
masthead_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
||||
remove_empty_feeds = True
|
||||
language = 'es'
|
||||
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
|
||||
|
||||
language = 'es_CL'
|
||||
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
@ -29,13 +29,13 @@ class LaSegunda(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
|
||||
remove_tags_before = dict(attrs={'class':'titulonegritastop'})
|
||||
remove_tags = [dict(name='img')]
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
|
||||
,(u'Politica' , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
|
||||
,(u'Cronica' , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
|
||||
@ -49,6 +49,6 @@ class LaSegunda(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
||||
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
||||
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
|
||||
|
||||
|
||||
|
@ -11,15 +11,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class LaMujerDeMiVida(BasicNewsRecipe):
|
||||
title = 'La Mujer de mi Vida'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Cultura de otra manera'
|
||||
description = 'Cultura de otra manera'
|
||||
oldest_article = 90
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
publisher = 'La Mujer de mi Vida'
|
||||
category = 'literatura, critica, arte, ensayos'
|
||||
language = 'es'
|
||||
category = 'literatura, critica, arte, ensayos'
|
||||
language = 'es_AR'
|
||||
|
||||
INDEX = 'http://www.lamujerdemivida.com.ar/'
|
||||
html2lrf_options = [
|
||||
@ -28,8 +28,8 @@ class LaMujerDeMiVida(BasicNewsRecipe):
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'width':'570'})]
|
||||
|
||||
@ -51,7 +51,7 @@ class LaMujerDeMiVida(BasicNewsRecipe):
|
||||
if cover_item:
|
||||
cover_url = self.INDEX + cover_item['src']
|
||||
return cover_url
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
@ -74,4 +74,4 @@ class LaMujerDeMiVida(BasicNewsRecipe):
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
|
||||
|
@ -16,17 +16,17 @@ class Lanacion(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
publication_type = 'newspaper'
|
||||
remove_empty_feeds = True
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
|
||||
extra_css = """ h1{font-family: Georgia,serif}
|
||||
h2{color: #626262}
|
||||
body{font-family: Arial,sans-serif}
|
||||
h2{color: #626262}
|
||||
body{font-family: Arial,sans-serif}
|
||||
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
|
||||
.notaFecha{color: #808080}
|
||||
.notaEpigrafe{font-size: x-small}
|
||||
.topNota h1{font-family: Arial,sans-serif}
|
||||
.notaFecha{color: #808080}
|
||||
.notaEpigrafe{font-size: x-small}
|
||||
.topNota h1{font-family: Arial,sans-serif}
|
||||
"""
|
||||
|
||||
|
||||
@ -45,7 +45,7 @@ class Lanacion(BasicNewsRecipe):
|
||||
,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
|
||||
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
|
||||
]
|
||||
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
|
||||
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
|
||||
remove_attributes = ['height','width','visible','onclick','data-count','name']
|
||||
|
||||
feeds = [
|
||||
|
@ -51,4 +51,4 @@ class LaNacionChile(BasicNewsRecipe):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = 'es'
|
||||
language = 'es_CL'
|
||||
|
@ -21,9 +21,9 @@ class LaPrensa(BasicNewsRecipe):
|
||||
encoding = 'cp1252'
|
||||
# cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
|
||||
remove_javascript = True
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
lang = 'es'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
@ -32,7 +32,7 @@ class LaPrensa(BasicNewsRecipe):
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
filter_regexps = [r'.*archive.aspx.*']
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='td', attrs={'class':["link-registro","link-buscador"]}),
|
||||
dict(name='td', attrs={'id':["TDTabItem1","TDTabItem2","TDTabItem3","TDTabItem4"]}),
|
||||
@ -58,9 +58,9 @@ class LaPrensa(BasicNewsRecipe):
|
||||
dict(name='img', src = "/versions/1/imgs/separador-linea-azul.gif"),
|
||||
dict(name='img', src = " /versions/1/imgs/separador-linea.gif"),
|
||||
dict(name='a',text ="Powered by Civinext Groupware - V. 2.0.3567.23706"),
|
||||
dict(name='img', height ="0")
|
||||
dict(name='img', height ="0")
|
||||
]
|
||||
|
||||
|
||||
extra_css = '''
|
||||
.seccion{font-size:xx-small;}
|
||||
body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||
@ -69,7 +69,7 @@ class LaPrensa(BasicNewsRecipe):
|
||||
.fecha{font-size:xx-small;}
|
||||
.volanta{font-size:xx-small;}
|
||||
'''
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Politica' , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=4' )
|
||||
,(u'Economia' , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=5' )
|
||||
@ -80,14 +80,14 @@ class LaPrensa(BasicNewsRecipe):
|
||||
,(u'Espectaculos', u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx?Rss=10')
|
||||
]
|
||||
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
|
||||
for t in soup.findAll(['table','td','tr','span','tbody']):
|
||||
t.name = 'div'
|
||||
for t in soup.findAll(['hr']):
|
||||
t.extract()
|
||||
|
||||
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
@ -95,8 +95,8 @@ class LaPrensa(BasicNewsRecipe):
|
||||
for item in soup.findAll(align = "center"):
|
||||
del item['align']
|
||||
for item in soup.findAll(bgcolor="ffffff"):
|
||||
del item['bgcolor']
|
||||
del item['bgcolor']
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ class LaPrensaHn(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
language = 'es_HN'
|
||||
|
||||
lang = 'es-HN'
|
||||
direction = 'ltr'
|
||||
|
@ -22,7 +22,7 @@ class LaPrensa_ni(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
language = 'es'
|
||||
language = 'es_NI'
|
||||
|
||||
months_es = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
|
||||
current_month = months_es[datetime.date.today().month - 1]
|
||||
|
@ -1,73 +1,92 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
latimes.com
|
||||
www.latimes.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LATimes(BasicNewsRecipe):
|
||||
title = u'The Los Angeles Times'
|
||||
__author__ = u'Darko Miletic and Sujata Raman'
|
||||
description = u'News from Los Angeles'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
title = 'Los Angeles Times'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California'
|
||||
publisher = 'Tribune Company'
|
||||
category = 'news, politics, USA, Los Angeles, world'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
lang = 'en-US'
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.latimes.com/images/logo.png'
|
||||
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
|
||||
extra_css = """
|
||||
body{font-family: Georgia,"Times New Roman",Times,serif }
|
||||
img{margin-bottom: 0.4em; margin-top: 0.8em; display:block}
|
||||
h2{font-size: 1.1em}
|
||||
.deckhead{font-size: small; text-transform: uppercase}
|
||||
.small{color: gray; font-size: small}
|
||||
.date,.time,.copyright{font-size: x-small; color:gray; font-style:italic;}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'language' : lang
|
||||
}
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : 'Yes'
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
|
||||
h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
|
||||
.story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
||||
.entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
||||
.entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
||||
.credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
||||
.small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
||||
.byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
||||
.date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
|
||||
.time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
|
||||
.copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
|
||||
.subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
|
||||
'''
|
||||
|
||||
# recursions = 1
|
||||
# match_regexps = [r'http://www.latimes.com/.*page=[2-9]']
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':["story" ,"entry"] })]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'story'})
|
||||
,dict(attrs={'class':['entry-header','time','entry-content']})
|
||||
]
|
||||
remove_tags_after=dict(name='p', attrs={'class':'copyright'})
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','iframe','object','embed'])
|
||||
,dict(attrs={'class':['toolSet','articlerail','googleAd','entry-footer-left','entry-footer-right','entry-footer-social','google-ad-story-bottom','sphereTools']})
|
||||
,dict(attrs={'id':['article-promo','googleads','moduleArticleToolsContainer','gallery-subcontent']})
|
||||
]
|
||||
remove_attributes=['lang','xmlns:fb','xmlns:og','border','xtags','i','article_body']
|
||||
|
||||
|
||||
remove_tags = [ dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}),
|
||||
dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}),
|
||||
dict(name='p', attrs={'class':["entry-footer",]}),
|
||||
dict(name='ul', attrs={'class':"article-nav clearfix"}),
|
||||
dict(name=['iframe'])
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'News', u'http://feeds.latimes.com/latimes/news')
|
||||
,(u'Local','http://feeds.latimes.com/latimes/news/local')
|
||||
,(u'MostEmailed','http://feeds.latimes.com/MostEmailed')
|
||||
,(u'Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/')
|
||||
,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/')
|
||||
,('National','http://feeds.latimes.com/latimes/news/nationworld/nation')
|
||||
,('Politics','http://feeds.latimes.com/latimes/news/politics/')
|
||||
,('Business','http://feeds.latimes.com/latimes/business')
|
||||
,('Sports','http://feeds.latimes.com/latimes/sports/')
|
||||
,('Entertainment','http://feeds.latimes.com/latimes/entertainment/')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Top News' , u'http://feeds.latimes.com/latimes/news' )
|
||||
,(u'Local News' , u'http://feeds.latimes.com/latimes/news/local' )
|
||||
,(u'National' , u'http://feeds.latimes.com/latimes/news/nationworld/nation' )
|
||||
,(u'National Politics' , u'http://feeds.latimes.com/latimes/news/politics/' )
|
||||
,(u'Business' , u'http://feeds.latimes.com/latimes/business' )
|
||||
,(u'Education' , u'http://feeds.latimes.com/latimes/news/education' )
|
||||
,(u'Environment' , u'http://feeds.latimes.com/latimes/news/science/environment' )
|
||||
,(u'Religion' , u'http://feeds.latimes.com/latimes/features/religion' )
|
||||
,(u'Science' , u'http://feeds.latimes.com/latimes/news/science' )
|
||||
,(u'Technology' , u'http://feeds.latimes.com/latimes/technology' )
|
||||
,(u'Africa' , u'http://feeds.latimes.com/latimes/africa' )
|
||||
,(u'Asia' , u'http://feeds.latimes.com/latimes/asia' )
|
||||
,(u'Europe' , u'http://feeds.latimes.com/latimes/europe' )
|
||||
,(u'Latin America' , u'http://feeds.latimes.com/latimes/latinamerica' )
|
||||
,(u'Middle East' , u'http://feeds.latimes.com/latimes/middleeast' )
|
||||
,(u'Arts&Culture' , u'http://feeds.feedburner.com/latimes/entertainment/news/arts' )
|
||||
,(u'Entertainment News' , u'http://feeds.feedburner.com/latimes/entertainment/news/' )
|
||||
,(u'Movie News' , u'http://feeds.feedburner.com/latimes/entertainment/news/movies/' )
|
||||
,(u'Movie Reviews' , u'http://feeds.feedburner.com/movies/reviews/' )
|
||||
,(u'Music News' , u'http://feeds.feedburner.com/latimes/entertainment/news/music/' )
|
||||
,(u'Pop Album Reviews' , u'http://feeds.feedburner.com/latimes/pop-album-reviews' )
|
||||
,(u'Restaurant Reviews' , u'http://feeds.feedburner.com/latimes/restaurant/reviews' )
|
||||
,(u'Theatar and Dance' , u'http://feeds.feedburner.com/latimes/theaterdance' )
|
||||
,(u'Autos' , u'http://feeds.latimes.com/latimes/classified/automotive/highway1/')
|
||||
,(u'Books' , u'http://feeds.latimes.com/features/books' )
|
||||
,(u'Food' , u'http://feeds.latimes.com/latimes/features/food/' )
|
||||
,(u'Health' , u'http://feeds.latimes.com/latimes/features/health/' )
|
||||
,(u'Real Estate' , u'http://feeds.latimes.com/latimes/classified/realestate/' )
|
||||
,(u'Commentary' , u'http://feeds2.feedburner.com/latimes/news/opinion/commentary/' )
|
||||
,(u'Sports' , u'http://feeds.latimes.com/latimes/sports/' )
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
ans = article.get('feedburner_origlink').rpartition('?')[0]
|
||||
ans = BasicNewsRecipe.get_article_url(self, article).rpartition('?')[0]
|
||||
|
||||
try:
|
||||
self.log('Looking for full story link in', ans)
|
||||
@ -83,4 +102,22 @@ class LATimes(BasicNewsRecipe):
|
||||
pass
|
||||
return ans
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name ='div'
|
||||
item.attrs =[]
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
return soup
|
||||
|
@ -21,7 +21,7 @@ class LaTribuna(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
language = 'es_HN'
|
||||
|
||||
lang = 'es-HN'
|
||||
direction = 'ltr'
|
||||
|
@ -18,7 +18,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
language = 'es_BO'
|
||||
publication_type = 'newspaper'
|
||||
delay = 1
|
||||
remove_empty_feeds = True
|
||||
|
@ -12,7 +12,7 @@ import datetime
|
||||
class Milenio(BasicNewsRecipe):
|
||||
title = u'Milenio-diario'
|
||||
__author__ = 'Bmsleight'
|
||||
language = 'es'
|
||||
language = 'es_MX'
|
||||
description = 'Milenio-diario'
|
||||
oldest_article = 10
|
||||
max_articles_per_feed = 100
|
||||
|
@ -20,7 +20,7 @@ class MiradasAlSur(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
|
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
|
||||
title = 'Montevideo COMM'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
|
@ -20,7 +20,7 @@ class Newsweek_Argentina(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
|
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
|
||||
title = 'Observa Digital'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
description = 'Noticias desde Uruguay'
|
||||
language = 'es'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
|
@ -19,15 +19,15 @@ class Pagina12(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
#autor{font-weight: bold}
|
||||
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
|
||||
#autor{font-weight: bold}
|
||||
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
|
||||
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
|
||||
.fgprincipal{font-size: large; font-weight: bold}
|
||||
"""
|
||||
@ -83,7 +83,7 @@ class Pagina12(BasicNewsRecipe):
|
||||
del it['href']
|
||||
del it['title']
|
||||
for item in soup.findAll('p'):
|
||||
it = item.find('h3')
|
||||
it = item.find('h3')
|
||||
if it:
|
||||
it.name='span'
|
||||
return soup
|
||||
return soup
|
||||
|
@ -17,7 +17,7 @@ class Perfil(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif'
|
||||
extra_css = """
|
||||
|
@ -13,7 +13,7 @@ class Reptantes(BasicNewsRecipe):
|
||||
description = u"cada vez que te haces acupuntura, tu muñeco vudú sufre en algún lado"
|
||||
oldest_article = 130
|
||||
max_articles_per_feed = 100
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
|
||||
title = 'Revista Bla'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Moda | Uruguay'
|
||||
language = 'es'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
|
@ -20,7 +20,7 @@ class Veintitres(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
language = 'es_AR'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
|
@ -360,6 +360,9 @@ class LinuxFreeze(Command):
|
||||
def main():
|
||||
try:
|
||||
sys.argv[0] = sys.calibre_basename
|
||||
dfv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
|
||||
if dfv and os.path.exists(dfv):
|
||||
sys.path.insert(0, os.path.abspath(dfv))
|
||||
set_default_encoding()
|
||||
set_helper()
|
||||
set_qt_plugin_path()
|
||||
|
@ -139,6 +139,13 @@ class CHMReader(CHMFile):
|
||||
if self.hhc_path not in files and files:
|
||||
self.hhc_path = files[0]
|
||||
|
||||
if self.hhc_path == '.hhc' and self.hhc_path not in files:
|
||||
from calibre import walk
|
||||
for x in walk(output_dir):
|
||||
if os.path.basename(x).lower() in ('index.htm', 'index.html'):
|
||||
self.hhc_path = os.path.relpath(x, output_dir)
|
||||
break
|
||||
|
||||
def _reformat(self, data, htmlpath):
|
||||
try:
|
||||
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||
|
@ -53,7 +53,7 @@ def find_pages(dir, sort_on_mtime=False, verbose=False):
|
||||
prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages]))
|
||||
return pages
|
||||
|
||||
class PageProcessor(list):
|
||||
class PageProcessor(list): # {{{
|
||||
'''
|
||||
Contains the actual image rendering logic. See :method:`render` and
|
||||
:method:`process_pages`.
|
||||
@ -111,6 +111,13 @@ class PageProcessor(list):
|
||||
|
||||
SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
|
||||
|
||||
try:
|
||||
if self.opts.comic_image_size:
|
||||
SCRWIDTH, SCRHEIGHT = map(int, [x.strip() for x in
|
||||
self.opts.comic_image_size.split('x')])
|
||||
except:
|
||||
pass # Ignore
|
||||
|
||||
if self.opts.keep_aspect_ratio:
|
||||
# Preserve the aspect ratio by adding border
|
||||
aspect = float(sizex) / float(sizey)
|
||||
@ -170,6 +177,7 @@ class PageProcessor(list):
|
||||
dest = dest[:-1]
|
||||
os.rename(dest+'8', dest)
|
||||
self.append(dest)
|
||||
# }}}
|
||||
|
||||
def render_pages(tasks, dest, opts, notification=lambda x, y: x):
|
||||
'''
|
||||
@ -291,7 +299,11 @@ class ComicInput(InputFormatPlugin):
|
||||
OptionRecommendation(name='no_process', recommended_value=False,
|
||||
help=_("Apply no processing to the image")),
|
||||
OptionRecommendation(name='dont_grayscale', recommended_value=False,
|
||||
help=_('Do not convert the image to grayscale (black and white)'))
|
||||
help=_('Do not convert the image to grayscale (black and white)')),
|
||||
OptionRecommendation(name='comic_image_size', recommended_value=None,
|
||||
help=_('Specify the image size as widthxheight pixels. Normally,'
|
||||
' an image size is automatically calculated from the output '
|
||||
'profile, this option overrides it.')),
|
||||
])
|
||||
|
||||
recommendations = set([
|
||||
|
@ -24,10 +24,11 @@ class HeuristicProcessor(object):
|
||||
self.chapters_no_title = 0
|
||||
self.chapters_with_title = 0
|
||||
self.blanks_deleted = False
|
||||
self.blanks_between_paragraphs = False
|
||||
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
|
||||
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
||||
self.softbreak = re.compile(r'\s*(?P<openline><p(?=\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
||||
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
|
||||
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|spacer)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
||||
self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
||||
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
|
||||
|
||||
def is_pdftohtml(self, src):
|
||||
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
||||
@ -42,8 +43,10 @@ class HeuristicProcessor(object):
|
||||
" chapters. - " + unicode(chap))
|
||||
return '<h2>'+chap+'</h2>\n'
|
||||
else:
|
||||
txt_chap = html2text(chap)
|
||||
txt_title = html2text(title)
|
||||
delete_whitespace = re.compile('^\s*(?P<c>.*?)\s*$')
|
||||
delete_quotes = re.compile('\'\"')
|
||||
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(chap)))
|
||||
txt_title = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(title)))
|
||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
||||
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
|
||||
@ -375,9 +378,9 @@ class HeuristicProcessor(object):
|
||||
html = re.sub('<p\s?/>', '', html)
|
||||
# Get rid of empty span, bold, font, em, & italics tags
|
||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
|
||||
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
|
||||
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
||||
self.deleted_nbsps = True
|
||||
return html
|
||||
|
||||
@ -416,6 +419,28 @@ class HeuristicProcessor(object):
|
||||
return True
|
||||
return False
|
||||
|
||||
def detect_blank_formatting(self, html):
|
||||
blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE)
|
||||
blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE)
|
||||
|
||||
def markup_spacers(match):
|
||||
blanks = match.group(0)
|
||||
blanks = self.blankreg.sub('\n<p class="spacer"> </p>', blanks)
|
||||
return blanks
|
||||
html = blanks_before_headings.sub(markup_spacers, html)
|
||||
html = blanks_after_headings.sub(markup_spacers, html)
|
||||
if self.html_preprocess_sections > self.min_chapters:
|
||||
html = re.sub('(?si)^.*?(?=<h\d)', markup_spacers, html)
|
||||
return html
|
||||
|
||||
def detect_soft_breaks(self, html):
|
||||
if not self.blanks_deleted and self.blanks_between_paragraphs:
|
||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
|
||||
else:
|
||||
html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
|
||||
return html
|
||||
|
||||
|
||||
|
||||
def __call__(self, html):
|
||||
self.log.debug("********* Heuristic processing HTML *********")
|
||||
@ -457,23 +482,23 @@ class HeuristicProcessor(object):
|
||||
#html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
|
||||
|
||||
# Determine whether the document uses interleaved blank lines
|
||||
blanks_between_paragraphs = self.analyze_blanks(html)
|
||||
self.blanks_between_paragraphs = self.analyze_blanks(html)
|
||||
|
||||
#self.dump(html, 'before_chapter_markup')
|
||||
# detect chapters/sections to match xpath or splitting logic
|
||||
|
||||
if getattr(self.extra_opts, 'markup_chapter_headings', False):
|
||||
html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
|
||||
html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
|
||||
|
||||
if getattr(self.extra_opts, 'italicize_common_cases', False):
|
||||
html = self.markup_italicis(html)
|
||||
|
||||
# If more than 40% of the lines are empty paragraphs and the user has enabled delete
|
||||
# blank paragraphs then delete blank lines to clean up spacing
|
||||
if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
|
||||
if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
|
||||
self.log.debug("deleting blank lines")
|
||||
self.blanks_deleted = True
|
||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
|
||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
|
||||
html = self.blankreg.sub('', html)
|
||||
|
||||
# Determine line ending type
|
||||
@ -525,14 +550,13 @@ class HeuristicProcessor(object):
|
||||
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
||||
|
||||
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
||||
html = self.detect_blank_formatting(html)
|
||||
html = self.detect_soft_breaks(html)
|
||||
# Center separator lines
|
||||
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
|
||||
if not self.blanks_deleted:
|
||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
|
||||
html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
|
||||
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
|
||||
#html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
|
||||
|
||||
if self.deleted_nbsps:
|
||||
# put back non-breaking spaces in empty paragraphs to preserve original formatting
|
||||
html = self.blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
||||
html = self.softbreak.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
||||
html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
||||
return html
|
||||
|
@ -175,6 +175,19 @@ class EPUBInput(InputFormatPlugin):
|
||||
raise ValueError(
|
||||
'EPUB files with DTBook markup are not supported')
|
||||
|
||||
for x in list(opf.iterspine()):
|
||||
ref = x.get('idref', None)
|
||||
if ref is None:
|
||||
x.getparent().remove(x)
|
||||
continue
|
||||
for y in opf.itermanifest():
|
||||
if y.get('id', None) == ref and y.get('media-type', None) in \
|
||||
('application/vnd.adobe-page-template+xml',):
|
||||
p = x.getparent()
|
||||
if p is not None:
|
||||
p.remove(x)
|
||||
break
|
||||
|
||||
with open('content.opf', 'wb') as nopf:
|
||||
nopf.write(opf.render())
|
||||
|
||||
|
@ -83,6 +83,7 @@ class RTFInput(InputFormatPlugin):
|
||||
os.mkdir(debug_dir)
|
||||
debug_dir = 'rtfdebug'
|
||||
run_lev = 4
|
||||
self.log('Running RTFParser in debug mode')
|
||||
except:
|
||||
pass
|
||||
parser = ParseRtf(
|
||||
@ -230,22 +231,6 @@ class RTFInput(InputFormatPlugin):
|
||||
with open('styles.css', 'ab') as f:
|
||||
f.write(css)
|
||||
|
||||
# def preprocess(self, fname):
|
||||
# self.log('\tPreprocessing to convert unicode characters')
|
||||
# try:
|
||||
# data = open(fname, 'rb').read()
|
||||
# from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
|
||||
# tokenizer = RtfTokenizer(data)
|
||||
# tokens = RtfTokenParser(tokenizer.tokens)
|
||||
# data = tokens.toRTF()
|
||||
# fname = 'preprocessed.rtf'
|
||||
# with open(fname, 'wb') as f:
|
||||
# f.write(data)
|
||||
# except:
|
||||
# self.log.exception(
|
||||
# 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
|
||||
# return fname
|
||||
|
||||
def convert_borders(self, doc):
|
||||
border_styles = []
|
||||
style_map = {}
|
||||
@ -280,8 +265,6 @@ class RTFInput(InputFormatPlugin):
|
||||
self.opts = options
|
||||
self.log = log
|
||||
self.log('Converting RTF to XML...')
|
||||
#Name of the preprocesssed RTF file
|
||||
# fname = self.preprocess(stream.name)
|
||||
try:
|
||||
xml = self.generate_xml(stream.name)
|
||||
except RtfInvalidCodeException, e:
|
||||
@ -335,3 +318,4 @@ class RTFInput(InputFormatPlugin):
|
||||
opf.render(open('metadata.opf', 'wb'))
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
|
||||
|
@ -238,6 +238,8 @@ class ParseRtf:
|
||||
bug_handler = RtfInvalidCodeException,
|
||||
)
|
||||
enc = 'cp' + encode_obj.get_codepage()
|
||||
if enc == 'cp10000':
|
||||
enc = 'mac_roman'
|
||||
msg = 'Exception in token processing'
|
||||
if check_encoding_obj.check_encoding(self.__file, enc):
|
||||
file_name = self.__file if isinstance(self.__file, str) \
|
||||
|
@ -15,8 +15,10 @@
|
||||
# #
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, tempfile, re
|
||||
import sys, os, tempfile, re
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
|
||||
class Colors:
|
||||
"""
|
||||
Change lines with color info from color numbers to the actual color names.
|
||||
@ -40,8 +42,10 @@ class Colors:
|
||||
self.__file = in_file
|
||||
self.__copy = copy
|
||||
self.__bug_handler = bug_handler
|
||||
self.__line = 0
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__run_level = run_level
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
Initiate all values.
|
||||
@ -61,6 +65,7 @@ class Colors:
|
||||
self.__color_num = 1
|
||||
self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
|
||||
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
||||
|
||||
def __before_color_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -76,6 +81,7 @@ class Colors:
|
||||
if self.__token_info == 'mi<mk<clrtbl-beg':
|
||||
self.__state = 'in_color_table'
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __default_color_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -87,6 +93,7 @@ class Colors:
|
||||
"""
|
||||
hex_num = line[-3:-1]
|
||||
self.__color_string += hex_num
|
||||
|
||||
def __blue_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -109,6 +116,7 @@ class Colors:
|
||||
)
|
||||
self.__color_num += 1
|
||||
self.__color_string = '#'
|
||||
|
||||
def __in_color_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -127,12 +135,13 @@ class Colors:
|
||||
self.__state = 'after_color_table'
|
||||
else:
|
||||
action = self.__state_dict.get(self.__token_info)
|
||||
if action == None:
|
||||
if action is None:
|
||||
sys.stderr.write('in module colors.py\n'
|
||||
'function is self.__in_color_func\n'
|
||||
'no action for %s' % self.__token_info
|
||||
)
|
||||
action(line)
|
||||
|
||||
def __after_color_func(self, line):
|
||||
"""
|
||||
Check the to see if it contains color info. If it does, extract the
|
||||
@ -180,6 +189,7 @@ class Colors:
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
||||
|
||||
def __sub_from_line_color(self, match_obj):
|
||||
num = match_obj.group(1)
|
||||
try:
|
||||
@ -191,25 +201,27 @@ class Colors:
|
||||
else:
|
||||
return 'bdr-color_:no-value'
|
||||
hex_num = self.__figure_num(num)
|
||||
return_value = 'bdr-color_:%s' % hex_num
|
||||
return return_value
|
||||
return 'bdr-color_:%s' % hex_num
|
||||
|
||||
def __figure_num(self, num):
|
||||
if num == 0:
|
||||
hex_num = 'false'
|
||||
else:
|
||||
hex_num = self.__color_dict.get(num)
|
||||
if hex_num == None:
|
||||
if self.__run_level > 3:
|
||||
msg = 'no value in self.__color_dict for key %s\n' % num
|
||||
raise self.__bug_hanlder, msg
|
||||
if hex_num == None:
|
||||
if hex_num is None:
|
||||
hex_num = '0'
|
||||
if self.__run_level > 5:
|
||||
msg = 'no value in self.__color_dict' \
|
||||
'for key %s at line %d\n' % (num, self.__line)
|
||||
raise self.__bug_handler, msg
|
||||
return hex_num
|
||||
|
||||
def __do_nothing_func(self, line):
|
||||
"""
|
||||
Bad RTF will have text in the color table
|
||||
"""
|
||||
pass
|
||||
|
||||
def convert_colors(self):
|
||||
"""
|
||||
Requires:
|
||||
@ -226,20 +238,16 @@ class Colors:
|
||||
info, and substitute the number with the hex number.
|
||||
"""
|
||||
self.__initiate_values()
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action == None:
|
||||
sys.stderr.write('no no matching state in module fonts.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
action(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
for line in read_obj:
|
||||
self.__line+=1
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action is None:
|
||||
sys.stderr.write('no matching state in module fonts.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
action(line)
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "color.data")
|
||||
|
@ -33,13 +33,13 @@ class ConvertToTags:
|
||||
self.__copy = copy
|
||||
self.__dtd_path = dtd_path
|
||||
self.__no_dtd = no_dtd
|
||||
if encoding != 'mac_roman':
|
||||
self.__encoding = 'cp' + encoding
|
||||
else:
|
||||
self.__encoding = 'cp' + encoding
|
||||
if encoding == 'mac_roman':
|
||||
self.__encoding = 'mac_roman'
|
||||
self.__indent = indent
|
||||
self.__run_level = run_level
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__convert_utf = False
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
@ -213,7 +213,8 @@ class ConvertToTags:
|
||||
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
|
||||
self.__convert_utf = True
|
||||
else:
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
|
||||
@ -253,15 +254,28 @@ class ConvertToTags:
|
||||
an empty tag function.
|
||||
"""
|
||||
self.__initiate_values()
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
self.__write_dec()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__token_info)
|
||||
if action is not None:
|
||||
action(line)
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
self.__write_dec()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__token_info)
|
||||
if action is not None:
|
||||
action(line)
|
||||
self.__write_obj.close()
|
||||
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
|
||||
if self.__convert_utf:
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
copy_obj.rename(self.__write_to, self.__file)
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as write_obj:
|
||||
file = read_obj.read()
|
||||
try:
|
||||
file = file.decode(self.__encoding)
|
||||
write_obj.write(file.encode('utf-8'))
|
||||
except:
|
||||
sys.stderr.write('Conversion to UTF-8 is not possible,'
|
||||
' encoding should be very carefully checked')
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
||||
|
@ -75,12 +75,16 @@ class DefaultEncoding:
|
||||
self._encoding()
|
||||
self.__datafetched = True
|
||||
code_page = 'ansicpg' + self.__code_page
|
||||
if self.__code_page == '10000':
|
||||
self.__code_page = 'mac_roman'
|
||||
return self.__platform, code_page, self.__default_num
|
||||
|
||||
def get_codepage(self):
|
||||
if not self.__datafetched:
|
||||
self._encoding()
|
||||
self.__datafetched = True
|
||||
if self.__code_page == '10000':
|
||||
self.__code_page = 'mac_roman'
|
||||
return self.__code_page
|
||||
|
||||
def get_platform(self):
|
||||
|
@ -16,7 +16,9 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, tempfile
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
|
||||
class Fonts:
|
||||
"""
|
||||
Change lines with font info from font numbers to the actual font names.
|
||||
@ -45,6 +47,7 @@ class Fonts:
|
||||
self.__default_font_num = default_font_num
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__run_level = run_level
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
Initiate all values.
|
||||
@ -67,6 +70,7 @@ class Fonts:
|
||||
self.__font_table = {}
|
||||
# individual font written
|
||||
self.__wrote_ind_font = 0
|
||||
|
||||
def __default_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -79,6 +83,7 @@ class Fonts:
|
||||
if self.__token_info == 'mi<mk<fonttb-beg':
|
||||
self.__state = 'font_table'
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __font_table_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -101,6 +106,7 @@ class Fonts:
|
||||
self.__font_num = self.__default_font_num
|
||||
self.__text_line = ''
|
||||
##self.__write_obj.write(line)
|
||||
|
||||
def __font_in_table_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -138,6 +144,7 @@ class Fonts:
|
||||
elif self.__token_info == 'mi<mk<fonttb-end':
|
||||
self.__found_end_font_table_func()
|
||||
self.__state = 'after_font_table'
|
||||
|
||||
def __found_end_font_table_func(self):
|
||||
"""
|
||||
Required:
|
||||
@ -150,7 +157,8 @@ class Fonts:
|
||||
if not self.__wrote_ind_font:
|
||||
self.__write_obj.write(
|
||||
'mi<tg<empty-att_'
|
||||
'<font-in-table<name>Times<num>0\n' )
|
||||
'<font-in-table<name>Times<num>0\n')
|
||||
|
||||
def __after_font_table_func(self, line):
|
||||
"""
|
||||
Required:
|
||||
@ -169,7 +177,7 @@ class Fonts:
|
||||
if self.__token_info == 'cw<ci<font-style':
|
||||
font_num = line[20:-1]
|
||||
font_name = self.__font_table.get(font_num)
|
||||
if font_name == None:
|
||||
if font_name is None:
|
||||
if self.__run_level > 3:
|
||||
msg = 'no value for %s in self.__font_table\n' % font_num
|
||||
raise self.__bug_handler, msg
|
||||
@ -182,6 +190,7 @@ class Fonts:
|
||||
)
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def convert_fonts(self):
|
||||
"""
|
||||
Required:
|
||||
@ -197,20 +206,15 @@ class Fonts:
|
||||
info. Substitute a font name for a font number.
|
||||
"""
|
||||
self.__initiate_values()
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action == None:
|
||||
sys.stderr.write('no no matching state in module fonts.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
action(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action is None:
|
||||
sys.stderr.write('no matching state in module fonts.py\n' \
|
||||
+ self.__state + '\n')
|
||||
action(line)
|
||||
default_font_name = self.__font_table.get(self.__default_font_num)
|
||||
if not default_font_name:
|
||||
default_font_name = 'Not Defined'
|
||||
|
@ -43,7 +43,7 @@ class GetCharMap:
|
||||
def get_char_map(self, map):
|
||||
if map == 'ansicpg0':
|
||||
map = 'ansicpg1250'
|
||||
if map in ('ansicpg10000', '10000'):
|
||||
if map == 'ansicpg10000':
|
||||
map = 'mac_roman'
|
||||
found_map = False
|
||||
map_dict = {}
|
||||
|
@ -126,12 +126,6 @@ class Tokenize:
|
||||
tokens = re.split(self.__splitexp, input_file)
|
||||
#remove empty tokens and \n
|
||||
return filter(lambda x: len(x) > 0 and x != '\n', tokens)
|
||||
#input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
|
||||
# line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
|
||||
# this is for older RTF
|
||||
#line = re.sub(self.__par_exp, '\\par ', line)
|
||||
#return filter(lambda x: len(x) > 0, \
|
||||
#(self.__remove_line.sub('', x) for x in tokens))
|
||||
|
||||
def __compile_expressions(self):
|
||||
SIMPLE_RPL = {
|
||||
@ -160,7 +154,7 @@ class Tokenize:
|
||||
}
|
||||
self.__replace_spchar = MReplace(SIMPLE_RPL)
|
||||
#add ;? in case of char following \u
|
||||
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
|
||||
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
|
||||
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
|
||||
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
|
||||
#manage upr/ud situations
|
||||
@ -172,14 +166,21 @@ class Tokenize:
|
||||
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
||||
#this is for old RTF
|
||||
self.__par_exp = re.compile(r'\\\n+')
|
||||
# self.__par_exp = re.compile(r'\\$')
|
||||
#handle cw using a digit as argument and without space as delimiter
|
||||
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
|
||||
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
||||
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
||||
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
|
||||
#self.__remove_line = re.compile(r'\n+')
|
||||
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
|
||||
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
|
||||
|
||||
def __correct_spliting(self, token):
|
||||
match_obj = re.search(self.__cwdigit_exp, token)
|
||||
if match_obj is None:
|
||||
return token
|
||||
else:
|
||||
return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
|
||||
|
||||
def tokenize(self):
|
||||
"""Main class for handling other methods. Reads the file \
|
||||
, uses method self.sub_reg to make basic substitutions,\
|
||||
@ -187,7 +188,7 @@ class Tokenize:
|
||||
#read
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
input_file = read_obj.read()
|
||||
|
||||
|
||||
#process simple replacements and split giving us a correct list
|
||||
#remove '' and \n in the process
|
||||
tokens = self.__sub_reg_split(input_file)
|
||||
@ -195,7 +196,9 @@ class Tokenize:
|
||||
tokens = map(self.__unicode_process, tokens)
|
||||
#remove empty items created by removing \uc
|
||||
tokens = filter(lambda x: len(x) > 0, tokens)
|
||||
|
||||
#handles bothersome cases
|
||||
tokens = map(self.__correct_spliting, tokens)
|
||||
|
||||
#write
|
||||
with open(self.__write_to, 'wb') as write_obj:
|
||||
write_obj.write('\n'.join(tokens))
|
||||
@ -203,11 +206,9 @@ class Tokenize:
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "tokenize.data")
|
||||
# if self.__out_file:
|
||||
# self.__file = self.__out_file
|
||||
copy_obj.rename(self.__write_to, self.__file)
|
||||
os.remove(self.__write_to)
|
||||
|
||||
|
||||
#self.__special_tokens = [ '_', '~', "'", '{', '}' ]
|
||||
|
||||
# import sys
|
||||
@ -223,4 +224,4 @@ class Tokenize:
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# sys.exit(main())
|
||||
# sys.exit(main())
|
||||
|
@ -22,7 +22,7 @@ class PluginWidget(Widget, Ui_Form):
|
||||
['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
|
||||
'despeckle', 'no_sort', 'no_process', 'landscape',
|
||||
'dont_sharpen', 'disable_trim', 'wide', 'output_format',
|
||||
'dont_grayscale']
|
||||
'dont_grayscale', 'comic_image_size']
|
||||
)
|
||||
self.db, self.book_id = db, book_id
|
||||
for x in get_option('output_format').option.choices:
|
||||
|
@ -7,7 +7,7 @@
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>599</width>
|
||||
<height>345</height>
|
||||
<height>398</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle">
|
||||
@ -37,70 +37,70 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<item row="4" column="0">
|
||||
<widget class="QCheckBox" name="opt_dont_normalize">
|
||||
<property name="text">
|
||||
<string>Disable &normalize</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="0">
|
||||
<item row="5" column="0">
|
||||
<widget class="QCheckBox" name="opt_keep_aspect_ratio">
|
||||
<property name="text">
|
||||
<string>Keep &aspect ratio</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0">
|
||||
<item row="6" column="0">
|
||||
<widget class="QCheckBox" name="opt_dont_sharpen">
|
||||
<property name="text">
|
||||
<string>Disable &Sharpening</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="6" column="0">
|
||||
<item row="7" column="0">
|
||||
<widget class="QCheckBox" name="opt_disable_trim">
|
||||
<property name="text">
|
||||
<string>Disable &Trimming</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="7" column="0">
|
||||
<item row="8" column="0">
|
||||
<widget class="QCheckBox" name="opt_wide">
|
||||
<property name="text">
|
||||
<string>&Wide</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="8" column="0">
|
||||
<item row="9" column="0">
|
||||
<widget class="QCheckBox" name="opt_landscape">
|
||||
<property name="text">
|
||||
<string>&Landscape</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="9" column="0">
|
||||
<item row="10" column="0">
|
||||
<widget class="QCheckBox" name="opt_right2left">
|
||||
<property name="text">
|
||||
<string>&Right to left</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="10" column="0">
|
||||
<item row="11" column="0">
|
||||
<widget class="QCheckBox" name="opt_no_sort">
|
||||
<property name="text">
|
||||
<string>Don't so&rt</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="11" column="0">
|
||||
<item row="12" column="0">
|
||||
<widget class="QCheckBox" name="opt_despeckle">
|
||||
<property name="text">
|
||||
<string>De&speckle</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="13" column="0">
|
||||
<item row="14" column="0">
|
||||
<spacer name="verticalSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
@ -120,7 +120,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="12" column="0">
|
||||
<item row="13" column="0">
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string>&Output format:</string>
|
||||
@ -130,7 +130,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="12" column="1">
|
||||
<item row="13" column="1">
|
||||
<widget class="QComboBox" name="opt_output_format"/>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
@ -140,6 +140,19 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="text">
|
||||
<string>Override image &size:</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>opt_comic_image_size</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="1">
|
||||
<widget class="QLineEdit" name="opt_comic_image_size"/>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<resources/>
|
||||
|
@ -838,9 +838,9 @@ class DeviceMixin(object): # {{{
|
||||
format_count[f] = 1
|
||||
for f in self.device_manager.device.settings().format_map:
|
||||
if f in format_count.keys():
|
||||
formats.append((f, _('%i of %i Books' % (format_count[f], len(rows))), True if f in aval_out_formats else False))
|
||||
formats.append((f, _('%i of %i Books') % (format_count[f], len(rows))), True if f in aval_out_formats else False)
|
||||
elif f in aval_out_formats:
|
||||
formats.append((f, _('0 of %i Books' % len(rows)), True))
|
||||
formats.append((f, _('0 of %i Books') % len(rows)), True)
|
||||
d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats)
|
||||
if d.exec_() != QDialog.Accepted:
|
||||
return
|
||||
|
@ -7,7 +7,7 @@ import os, shutil
|
||||
|
||||
from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \
|
||||
QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \
|
||||
QLineEdit, Qt, QProgressBar, QSize, QTimer
|
||||
QLineEdit, Qt, QProgressBar, QSize, QTimer, QIcon, QTextEdit
|
||||
|
||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||
from calibre.library.check_library import CheckLibrary, CHECKS
|
||||
@ -16,7 +16,7 @@ from calibre import prints, as_unicode
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.library.sqlite import DBThread, OperationalError
|
||||
|
||||
class DBCheck(QDialog):
|
||||
class DBCheck(QDialog): # {{{
|
||||
|
||||
def __init__(self, parent, db):
|
||||
QDialog.__init__(self, parent)
|
||||
@ -134,7 +134,7 @@ class DBCheck(QDialog):
|
||||
def reject(self):
|
||||
self.rejected = True
|
||||
QDialog.reject(self)
|
||||
|
||||
# }}}
|
||||
|
||||
class Item(QTreeWidgetItem):
|
||||
pass
|
||||
@ -146,9 +146,70 @@ class CheckLibraryDialog(QDialog):
|
||||
self.db = db
|
||||
|
||||
self.setWindowTitle(_('Check Library -- Problems Found'))
|
||||
self.setWindowIcon(QIcon(I('debug.png')))
|
||||
|
||||
self._layout = QVBoxLayout(self)
|
||||
self.setLayout(self._layout)
|
||||
self._tl = QHBoxLayout()
|
||||
self._layout = QVBoxLayout()
|
||||
self.setLayout(self._tl)
|
||||
self._tl.addLayout(self._layout)
|
||||
self.helpw = QTextEdit(self)
|
||||
self._tl.addWidget(self.helpw)
|
||||
self.helpw.setReadOnly(True)
|
||||
self.helpw.setText(_('''\
|
||||
<h1>Help</h1>
|
||||
|
||||
<p>calibre stores the list of your books and their metadata in a
|
||||
database. The actual book files and covers are stored as normal
|
||||
files in the calibre library folder. The database contains a list of the files
|
||||
and covers belonging to each book entry. This tool checks that the
|
||||
actual files in the library folder on your computer match the
|
||||
information in the database.</p>
|
||||
|
||||
<p>The result of each type of check is shown to the left. The various
|
||||
checks are:
|
||||
</p>
|
||||
<ul>
|
||||
<li><b>Invalid titles</b>: These are files and folders appearing
|
||||
in the library where books titles should, but that do not have the
|
||||
correct form to be a book title.</li>
|
||||
<li><b>Extra titles</b>: These are extra files in your calibre
|
||||
library that appear to be correctly-formed titles, but have no corresponding
|
||||
entries in the database</li>
|
||||
<li><b>Invalid authors</b>: These are files appearing
|
||||
in the library where only author folders should be.</li>
|
||||
<li><b>Extra authors</b>: These are folders in the
|
||||
calibre library that appear to be authors but that do not have entries
|
||||
in the database</li>
|
||||
<li><b>Missing book formats</b>: These are book formats that are in
|
||||
the database but have no corresponding format file in the book's folder.
|
||||
<li><b>Extra book formats</b>: These are book format files found in
|
||||
the book's folder but not in the database.
|
||||
<li><b>Unknown files in books</b>: These are extra files in the
|
||||
folder of each book that do not correspond to a known format or cover
|
||||
file.</li>
|
||||
<li><b>Missing cover files</b>: These represent books that are marked
|
||||
in the database as having covers but the actual cover files are
|
||||
missing.</li>
|
||||
<li><b>Cover files not in database</b>: These are books that have
|
||||
cover files but are marked as not having covers in the database.</li>
|
||||
<li><b>Folder raising exception</b>: These represent folders in the
|
||||
calibre library that could not be processed/understood by this
|
||||
tool.</li>
|
||||
</ul>
|
||||
|
||||
<p>There are two kinds of automatic fixes possible: <i>Delete
|
||||
marked</i> and <i>Fix marked</i>.</p>
|
||||
<p><i>Delete marked</i> is used to remove extra files/folders/covers that
|
||||
have no entries in the database. Check the box next to the item you want
|
||||
to delete. Use with caution.</p>
|
||||
<p><i>Fix marked</i> is applicable only to covers (the two lines marked
|
||||
'fixable'). In the case of missing cover files, checking the fixable
|
||||
box and pushing this button will remove the cover mark from the
|
||||
database for all the files in that category. In the case of extra
|
||||
cover files, checking the fixable box and pushing this button will
|
||||
add the cover mark to the database for all the files in that
|
||||
category.</p>
|
||||
'''))
|
||||
|
||||
self.log = QTreeWidget(self)
|
||||
self.log.itemChanged.connect(self.item_changed)
|
||||
@ -199,7 +260,7 @@ class CheckLibraryDialog(QDialog):
|
||||
self._layout.addLayout(h)
|
||||
|
||||
self._layout.addWidget(self.bbox)
|
||||
self.resize(750, 500)
|
||||
self.resize(950, 500)
|
||||
self.bbox.setEnabled(True)
|
||||
|
||||
def do_exec(self):
|
||||
@ -347,5 +408,6 @@ class CheckLibraryDialog(QDialog):
|
||||
|
||||
if __name__ == '__main__':
|
||||
app = QApplication([])
|
||||
d = CheckLibraryDialog()
|
||||
from calibre.library import db
|
||||
d = CheckLibraryDialog(None, db())
|
||||
d.exec_()
|
||||
|
@ -266,7 +266,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||
|
||||
def add_plugin(self):
|
||||
path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
|
||||
filters=[(_('Plugins'), ['zip'])], all_files=False,
|
||||
filters=[(_('Plugins') + ' (*.zip)', ['zip'])], all_files=False,
|
||||
select_only_single_file=True)
|
||||
if not path:
|
||||
return
|
||||
|
@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
help = _('The fields to output when cataloging books in the '
|
||||
'database. Should be a comma-separated list of fields.\n'
|
||||
'Available fields: %s.\n'
|
||||
'plus user-created custom fields.\n'
|
||||
'Example: %s=title,authors,tags\n'
|
||||
"Default: '%%default'\n"
|
||||
"Applies to: BIBTEX output format")%(', '.join(FIELDS),
|
||||
@ -269,7 +270,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
dest = 'bib_cit',
|
||||
action = None,
|
||||
help = _('The template for citation creation from database fields.\n'
|
||||
' Should be a template with {} enclosed fields.\n'
|
||||
'Should be a template with {} enclosed fields.\n'
|
||||
'Available fields: %s.\n'
|
||||
"Default: '%%default'\n"
|
||||
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
|
||||
@ -344,7 +345,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
if field == 'authors' :
|
||||
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
|
||||
|
||||
elif field in ['title', 'publisher', 'cover', 'uuid',
|
||||
elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
|
||||
'author_sort', 'series'] :
|
||||
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
|
||||
|
||||
@ -378,7 +379,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
if calibre_files:
|
||||
files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
|
||||
for format in item]
|
||||
bibtex_entry.append(u'files = "%s"' % u', '.join(files))
|
||||
bibtex_entry.append(u'file = "%s"' % u', '.join(files))
|
||||
|
||||
elif field == 'series_index' :
|
||||
bibtex_entry.append(u'volume = "%s"' % int(item))
|
||||
@ -474,6 +475,8 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
if opts.verbose:
|
||||
opts_dict = vars(opts)
|
||||
log("%s(): Generating %s" % (self.name,self.fmt))
|
||||
if opts.connected_device['is_device_connected']:
|
||||
log(" connected_device: %s" % opts.connected_device['name'])
|
||||
if opts_dict['search_text']:
|
||||
log(" --search='%s'" % opts_dict['search_text'])
|
||||
|
||||
@ -548,6 +551,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
as outfile:
|
||||
#File header
|
||||
nb_entries = len(data)
|
||||
|
||||
#check in book strict if all is ok else throw a warning into log
|
||||
if bib_entry == 'book' :
|
||||
nb_books = len(filter(check_entry_book_valid, data))
|
||||
@ -555,6 +559,11 @@ class BIBTEX(CatalogPlugin): # {{{
|
||||
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
||||
nb_entries = nb_books
|
||||
|
||||
# If connected device, add 'On Device' values to data
|
||||
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
|
||||
for entry in data:
|
||||
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
|
||||
|
||||
outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
|
||||
outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
|
||||
% (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -112,6 +112,16 @@ _extra_lang_codes = {
|
||||
'en_IE' : _('English (Ireland)'),
|
||||
'en_CN' : _('English (China)'),
|
||||
'es_PY' : _('Spanish (Paraguay)'),
|
||||
'es_UY' : _('Spanish (Uruguay)'),
|
||||
'es_AR' : _('Spanish (Argentina)'),
|
||||
'es_MX' : _('Spanish (Mexico)'),
|
||||
'es_CU' : _('Spanish (Cuba)'),
|
||||
'es_CL' : _('Spanish (Chile)'),
|
||||
'es_EC' : _('Spanish (Ecuador)'),
|
||||
'es_HN' : _('Spanish (Honduras)'),
|
||||
'es_VE' : _('Spanish (Venezuela)'),
|
||||
'es_BO' : _('Spanish (Bolivia)'),
|
||||
'es_NI' : _('Spanish (Nicaragua)'),
|
||||
'de_AT' : _('German (AT)'),
|
||||
'fr_BE' : _('French (BE)'),
|
||||
'nl' : _('Dutch (NL)'),
|
||||
|
Loading…
x
Reference in New Issue
Block a user