Sync to trunk.

This commit is contained in:
John Schember 2011-01-31 18:56:29 -05:00
commit 54e7ba109d
75 changed files with 973 additions and 556 deletions

View File

@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
title = '180.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5

View File

@ -20,7 +20,7 @@ class SieteDias(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'es'
language = 'es_AR'
lang = 'es-AR'
direction = 'ltr'

View File

@ -58,4 +58,4 @@ class Ambito(BasicNewsRecipe):
del item['style']
return soup
language = 'es'
language = 'es_AR'

View File

@ -12,7 +12,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
oldest_article = 1
max_articles_per_feed = 100
language = 'es'
language = 'es_MX'
#feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]

View File

@ -17,7 +17,7 @@ class Axxon_news(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = False
use_embedded_content = False
language = 'es'
language = 'es_AR'
encoding = 'utf-8'
publication_type = 'magazine'
INDEX = 'http://axxon.com.ar/rev/'

View File

@ -18,7 +18,7 @@ class Axxon_news(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = False
use_embedded_content = False
language = 'es'
language = 'es_AR'
lang = 'es-AR'

View File

@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
title = 'bitacora.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5

View File

@ -20,7 +20,7 @@ class BsAsEconomico(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'es'
language = 'es_AR'
lang = 'es-AR'
direction = 'ltr'

View File

@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe):
use_embedded_content = False
no_stylesheets = True
encoding = 'utf8'
language = 'es'
language = 'es_AR'
publication_type = 'newspaper'
INDEX = 'http://www.clarin.com'
masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'

View File

@ -14,7 +14,7 @@ class CriticaDigital(BasicNewsRecipe):
description = 'Noticias de Argentina'
oldest_article = 2
max_articles_per_feed = 100
language = 'es'
language = 'es_AR'
no_stylesheets = True
use_embedded_content = False

View File

@ -11,7 +11,7 @@ class CubaDebate(BasicNewsRecipe):
__author__ = 'Darko Miletic'
description = 'Contra el Terorismo Mediatico'
oldest_article = 15
language = 'es'
language = 'es_CU'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
@ -20,8 +20,8 @@ class CubaDebate(BasicNewsRecipe):
encoding = 'utf-8'
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
publication_type = 'newsportal'
extra_css = """
#BlogTitle{font-size: xx-large; font-weight: bold}
extra_css = """
#BlogTitle{font-size: xx-large; font-weight: bold}
body{font-family: Verdana, Arial, Tahoma, sans-serif}
"""
@ -41,7 +41,7 @@ class CubaDebate(BasicNewsRecipe):
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
remove_attributes=['width','height','lang']
def print_version(self, url):
return url + 'print/'
@ -50,5 +50,5 @@ class CubaDebate(BasicNewsRecipe):
del item['style']
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
item['alt'] = 'image'
return soup

View File

@ -16,7 +16,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
language = 'es'
language = 'de_ES'
publication_type = 'newsportal'
remove_empty_feeds = True
masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'

View File

@ -20,7 +20,7 @@ class Diagonales(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'es'
language = 'es_AR'
lang = 'es-AR'
direction = 'ltr'

View File

@ -20,8 +20,8 @@ class ElMercurio(BasicNewsRecipe):
masthead_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
remove_javascript = True
use_embedded_content = False
language = 'es'
language = 'es_CL'
conversion_options = {
'comment' : description
@ -33,7 +33,7 @@ class ElMercurio(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'id':['cont_iz_titulobajada','cont_iz_creditos_1_a','cont_iz_cuerpo']})]
remove_tags = [dict(name='div', attrs={'id':'cont_iz_cuerpo_relacionados'})]
remove_attributes = ['height','width']
feeds = [
(u'Noticias de ultima hora', u'http://rss.emol.com/rss.asp?canal=0')
,(u'Nacional', u'http://rss.emol.com/rss.asp?canal=1')

View File

@ -13,7 +13,7 @@ class ObservaDigital(BasicNewsRecipe):
title = 'Observa Digital'
__author__ = 'yrvn'
description = 'Noticias de Uruguay'
language = 'es'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5

View File

@ -14,7 +14,7 @@ class General(BasicNewsRecipe):
description = 'Noticias de Uruguay y el resto del mundo'
publisher = 'EL PAIS S.A.'
category = 'news, politics, Uruguay'
language = 'es'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 2

View File

@ -20,7 +20,7 @@ class ElUniversal(BasicNewsRecipe):
remove_javascript = True
remove_empty_feeds = True
publication_type = 'newspaper'
language = 'es'
language = 'es_MX'
extra_css = '''
body{font-family:Arial,Helvetica,sans-serif}

View File

@ -12,7 +12,7 @@ class ElArgentino(BasicNewsRecipe):
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
publisher = 'ElArgentino.com'
category = 'news, politics, Argentina'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
@ -20,7 +20,7 @@ class ElArgentino(BasicNewsRecipe):
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
language = 'es'
language = 'es_AR'
html2lrf_options = [
@ -28,16 +28,16 @@ class ElArgentino(BasicNewsRecipe):
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [
dict(name='div', attrs={'id':'noprint' })
,dict(name='div', attrs={'class':'encabezadoImprimir'})
,dict(name='a' , attrs={'target':'_blank' })
]
feeds = [
feeds = [
(u'Portada' , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home' )
,(u'Pais' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs' )
,(u'Economia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa' )
@ -51,12 +51,12 @@ class ElArgentino(BasicNewsRecipe):
def print_version(self, url):
main, sep, article_part = url.partition('/nota-')
article_id, rsep, rrest = article_part.partition('-')
article_id, rsep, rrest = article_part.partition('-')
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
del item['style']
return soup

View File

@ -18,7 +18,7 @@ class ElComercio(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = True
language = 'es'
language = 'es_EC'
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '

View File

@ -13,7 +13,7 @@ class ElCronista(BasicNewsRecipe):
__author__ = 'Darko Miletic'
description = 'Noticias de Argentina'
oldest_article = 2
language = 'es'
language = 'es_AR'
max_articles_per_feed = 100
no_stylesheets = True
@ -25,14 +25,14 @@ class ElCronista(BasicNewsRecipe):
, '--category' , 'news, Argentina'
, '--publisher' , title
]
keep_only_tags = [
dict(name='table', attrs={'width':'100%' })
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
]
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
feeds = [
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
@ -69,4 +69,4 @@ class ElCronista(BasicNewsRecipe):
if link_item:
cover_url = index + link_item.img['src']
return cover_url

View File

@ -21,7 +21,7 @@ class ElTiempoHn(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = 'es'
language = 'es_HN'
lang = 'es-HN'
direction = 'ltr'

View File

@ -18,7 +18,7 @@ class ElUniversal(BasicNewsRecipe):
encoding = 'cp1252'
publisher = 'El Universal'
category = 'news, Caracas, Venezuela, world'
language = 'es'
language = 'es_VE'
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
conversion_options = {

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ElUniversalImpresaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'es'
language = 'es_MX'
version = 1
title = u'El Universal (Edici\u00F3n Impresa)'

View File

@ -17,7 +17,7 @@ class ElUniverso_Ecuador(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'es'
language = 'es_EC'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'

View File

@ -0,0 +1,54 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Explosm(BasicNewsRecipe):
title = u'Explosm Rotated'
__author__ = 'Andromeda Rabbit'
description = 'Explosm'
language = 'en'
use_embedded_content = False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
max_articles_per_feed = 10
feeds = [
(u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
]
#match_regexps = [r'http://www.explosm.net/comics/.*']
keep_only_tags = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
def get_cover_url(self):
return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
if skip_tag is None:
return soup
return None

View File

@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
title = 'freeway.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Revista Freeway, Montevideo, Uruguay'
language = 'es'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 1

View File

@ -20,7 +20,7 @@ class Granma(BasicNewsRecipe):
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
language = 'es'
language = 'es_CU'
remove_javascript = True

View File

@ -18,7 +18,7 @@ class iEco(BasicNewsRecipe):
encoding = 'utf-8'
publisher = 'Grupo Clarin'
category = 'news, economia, mercados, bolsa de valores, finanzas, empresas, negocios, empleos, emprendedores, marketinguniversidades, tecnologia, agronegocios, noticias, informacion'
language = 'es'
language = 'es_AR'
cover_url = 'http://www.ieco.clarin.com/static2/images/Tapa-PDF.gif'
extra_css = ' #bd{font-family: sans-serif} '

View File

@ -16,7 +16,7 @@ class Infobae(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'es'
language = 'es_AR'
encoding = 'cp1252'
masthead_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True
@ -25,7 +25,7 @@ class Infobae(BasicNewsRecipe):
body{font-family:Arial,Helvetica,sans-serif;}
.popUpTitulo{color:#0D4261; font-size: xx-large}
'''
conversion_options = {
'comment' : description
, 'tags' : category
@ -33,7 +33,7 @@ class Infobae(BasicNewsRecipe):
, 'language' : language
, 'linearize_tables' : True
}
feeds = [
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )

View File

@ -20,7 +20,7 @@ class Juventudrebelde(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
language = 'es'
language = 'es_CU'
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
remove_javascript = True

View File

@ -50,4 +50,4 @@ class LaCuarta(BasicNewsRecipe):
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
language = 'es'
language = 'es_CL'

View File

@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
title = 'La Diaria'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5

View File

@ -19,7 +19,7 @@ class LaJornada_mx(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'es'
language = 'es_MX'
remove_empty_feeds = True
cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf")
masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'
@ -34,8 +34,8 @@ class LaJornada_mx(BasicNewsRecipe):
.credito{font-weight: bold; margin-left: 1em}
.credito-autor{font-variant: small-caps; font-weight: bold }
.credito-titulo{text-align: right}
.hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em }
.loc{font-weight: bold}
.hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em }
.loc{font-weight: bold}
.carton{text-align: center}
.credit{font-weight: bold}
.sumario{font-weight: bold; text-align: center}
@ -56,7 +56,7 @@ class LaJornada_mx(BasicNewsRecipe):
,re.DOTALL|re.IGNORECASE)
,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
]
keep_only_tags = [
dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','credito-articulo','text','carton']})
,dict(name='div', attrs={'id':'renderComments'})
@ -88,4 +88,4 @@ class LaJornada_mx(BasicNewsRecipe):
def get_article_url(self, article):
rurl = article.get('link', None)
return rurl.rpartition('&partner=')[0]

View File

@ -18,7 +18,7 @@ class LaRazon_Bol(BasicNewsRecipe):
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'es'
language = 'es_BO'
publication_type = 'newspaper'
delay = 1
remove_empty_feeds = True

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LaSegunda(BasicNewsRecipe):
title = 'La Segunda'
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
description = 'El sitio de noticias online de Chile'
publisher = 'La Segunda'
category = 'news, politics, Chile'
oldest_article = 2
@ -19,9 +19,9 @@ class LaSegunda(BasicNewsRecipe):
encoding = 'cp1252'
masthead_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_empty_feeds = True
language = 'es'
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
language = 'es_CL'
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
conversion_options = {
'comment' : description
, 'tags' : category
@ -29,13 +29,13 @@ class LaSegunda(BasicNewsRecipe):
, 'language' : language
, 'linearize_tables' : True
}
remove_tags_before = dict(attrs={'class':'titulonegritastop'})
remove_tags = [dict(name='img')]
remove_attributes = ['width','height']
feeds = [
feeds = [
(u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
,(u'Politica' , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
,(u'Cronica' , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
@ -49,6 +49,6 @@ class LaSegunda(BasicNewsRecipe):
]
def print_version(self, url):
rest, sep, article_id = url.partition('index.asp?idnoticia=')
rest, sep, article_id = url.partition('index.asp?idnoticia=')
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id

View File

@ -11,15 +11,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LaMujerDeMiVida(BasicNewsRecipe):
title = 'La Mujer de mi Vida'
__author__ = 'Darko Miletic'
description = 'Cultura de otra manera'
description = 'Cultura de otra manera'
oldest_article = 90
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
publisher = 'La Mujer de mi Vida'
category = 'literatura, critica, arte, ensayos'
language = 'es'
category = 'literatura, critica, arte, ensayos'
language = 'es_AR'
INDEX = 'http://www.lamujerdemivida.com.ar/'
html2lrf_options = [
@ -28,8 +28,8 @@ class LaMujerDeMiVida(BasicNewsRecipe):
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [dict(name='table', attrs={'width':'570'})]
@ -51,7 +51,7 @@ class LaMujerDeMiVida(BasicNewsRecipe):
if cover_item:
cover_url = self.INDEX + cover_item['src']
return cover_url
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
@ -74,4 +74,4 @@ class LaMujerDeMiVida(BasicNewsRecipe):
})
totalfeeds.append((feedtitle, articles))
return totalfeeds

View File

@ -16,17 +16,17 @@ class Lanacion(BasicNewsRecipe):
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
language = 'es'
language = 'es_AR'
publication_type = 'newspaper'
remove_empty_feeds = True
remove_empty_feeds = True
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
extra_css = """ h1{font-family: Georgia,serif}
h2{color: #626262}
body{font-family: Arial,sans-serif}
h2{color: #626262}
body{font-family: Arial,sans-serif}
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
.notaFecha{color: #808080}
.notaEpigrafe{font-size: x-small}
.topNota h1{font-family: Arial,sans-serif}
.notaFecha{color: #808080}
.notaEpigrafe{font-size: x-small}
.topNota h1{font-family: Arial,sans-serif}
"""
@ -45,7 +45,7 @@ class Lanacion(BasicNewsRecipe):
,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
]
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
remove_attributes = ['height','width','visible','onclick','data-count','name']
feeds = [

View File

@ -51,4 +51,4 @@ class LaNacionChile(BasicNewsRecipe):
del item['style']
return soup
language = 'es'
language = 'es_CL'

View File

@ -21,9 +21,9 @@ class LaPrensa(BasicNewsRecipe):
encoding = 'cp1252'
# cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
remove_javascript = True
language = 'es'
language = 'es_AR'
lang = 'es'
html2lrf_options = [
'--comment', description
, '--category', category
@ -32,7 +32,7 @@ class LaPrensa(BasicNewsRecipe):
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
filter_regexps = [r'.*archive.aspx.*']
remove_tags = [
dict(name='td', attrs={'class':["link-registro","link-buscador"]}),
dict(name='td', attrs={'id':["TDTabItem1","TDTabItem2","TDTabItem3","TDTabItem4"]}),
@ -58,9 +58,9 @@ class LaPrensa(BasicNewsRecipe):
dict(name='img', src = "/versions/1/imgs/separador-linea-azul.gif"),
dict(name='img', src = " /versions/1/imgs/separador-linea.gif"),
dict(name='a',text ="Powered by Civinext Groupware - V. 2.0.3567.23706"),
dict(name='img', height ="0")
dict(name='img', height ="0")
]
extra_css = '''
.seccion{font-size:xx-small;}
body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
@ -69,7 +69,7 @@ class LaPrensa(BasicNewsRecipe):
.fecha{font-size:xx-small;}
.volanta{font-size:xx-small;}
'''
feeds = [
(u'Politica' , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=4' )
,(u'Economia' , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=5' )
@ -80,14 +80,14 @@ class LaPrensa(BasicNewsRecipe):
,(u'Espectaculos', u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx?Rss=10')
]
def preprocess_html(self, soup):
for t in soup.findAll(['table','td','tr','span','tbody']):
t.name = 'div'
for t in soup.findAll(['hr']):
t.extract()
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
@ -95,8 +95,8 @@ class LaPrensa(BasicNewsRecipe):
for item in soup.findAll(align = "center"):
del item['align']
for item in soup.findAll(bgcolor="ffffff"):
del item['bgcolor']
del item['bgcolor']
return soup

View File

@ -21,7 +21,7 @@ class LaPrensaHn(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = 'es'
language = 'es_HN'
lang = 'es-HN'
direction = 'ltr'

View File

@ -22,7 +22,7 @@ class LaPrensa_ni(BasicNewsRecipe):
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
language = 'es'
language = 'es_NI'
months_es = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
current_month = months_es[datetime.date.today().month - 1]

View File

@ -1,73 +1,92 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
latimes.com
www.latimes.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class LATimes(BasicNewsRecipe):
title = u'The Los Angeles Times'
__author__ = u'Darko Miletic and Sujata Raman'
description = u'News from Los Angeles'
oldest_article = 7
max_articles_per_feed = 100
language = 'en'
title = 'Los Angeles Times'
__author__ = 'Darko Miletic'
description = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California'
publisher = 'Tribune Company'
category = 'news, politics, USA, Los Angeles, world'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
encoding = 'utf-8'
lang = 'en-US'
language = 'en'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.latimes.com/images/logo.png'
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
extra_css = """
body{font-family: Georgia,"Times New Roman",Times,serif }
img{margin-bottom: 0.4em; margin-top: 0.8em; display:block}
h2{font-size: 1.1em}
.deckhead{font-size: small; text-transform: uppercase}
.small{color: gray; font-size: small}
.date,.time,.copyright{font-size: x-small; color:gray; font-style:italic;}
"""
conversion_options = {
'comment' : description
, 'language' : lang
}
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : 'Yes'
}
extra_css = '''
h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
.story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
.entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
.entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
.credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
.small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
.byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
.date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
.time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
.copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
.subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
'''
# recursions = 1
# match_regexps = [r'http://www.latimes.com/.*page=[2-9]']
keep_only_tags = [dict(name='div', attrs={'class':["story" ,"entry"] })]
keep_only_tags = [
dict(name='div', attrs={'class':'story'})
,dict(attrs={'class':['entry-header','time','entry-content']})
]
remove_tags_after=dict(name='p', attrs={'class':'copyright'})
remove_tags = [
dict(name=['meta','link','iframe','object','embed'])
,dict(attrs={'class':['toolSet','articlerail','googleAd','entry-footer-left','entry-footer-right','entry-footer-social','google-ad-story-bottom','sphereTools']})
,dict(attrs={'id':['article-promo','googleads','moduleArticleToolsContainer','gallery-subcontent']})
]
remove_attributes=['lang','xmlns:fb','xmlns:og','border','xtags','i','article_body']
remove_tags = [ dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}),
dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}),
dict(name='p', attrs={'class':["entry-footer",]}),
dict(name='ul', attrs={'class':"article-nav clearfix"}),
dict(name=['iframe'])
]
feeds = [(u'News', u'http://feeds.latimes.com/latimes/news')
,(u'Local','http://feeds.latimes.com/latimes/news/local')
,(u'MostEmailed','http://feeds.latimes.com/MostEmailed')
,(u'Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/')
,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/')
,('National','http://feeds.latimes.com/latimes/news/nationworld/nation')
,('Politics','http://feeds.latimes.com/latimes/news/politics/')
,('Business','http://feeds.latimes.com/latimes/business')
,('Sports','http://feeds.latimes.com/latimes/sports/')
,('Entertainment','http://feeds.latimes.com/latimes/entertainment/')
]
feeds = [
(u'Top News' , u'http://feeds.latimes.com/latimes/news' )
,(u'Local News' , u'http://feeds.latimes.com/latimes/news/local' )
,(u'National' , u'http://feeds.latimes.com/latimes/news/nationworld/nation' )
,(u'National Politics' , u'http://feeds.latimes.com/latimes/news/politics/' )
,(u'Business' , u'http://feeds.latimes.com/latimes/business' )
,(u'Education' , u'http://feeds.latimes.com/latimes/news/education' )
,(u'Environment' , u'http://feeds.latimes.com/latimes/news/science/environment' )
,(u'Religion' , u'http://feeds.latimes.com/latimes/features/religion' )
,(u'Science' , u'http://feeds.latimes.com/latimes/news/science' )
,(u'Technology' , u'http://feeds.latimes.com/latimes/technology' )
,(u'Africa' , u'http://feeds.latimes.com/latimes/africa' )
,(u'Asia' , u'http://feeds.latimes.com/latimes/asia' )
,(u'Europe' , u'http://feeds.latimes.com/latimes/europe' )
,(u'Latin America' , u'http://feeds.latimes.com/latimes/latinamerica' )
,(u'Middle East' , u'http://feeds.latimes.com/latimes/middleeast' )
,(u'Arts&Culture' , u'http://feeds.feedburner.com/latimes/entertainment/news/arts' )
,(u'Entertainment News' , u'http://feeds.feedburner.com/latimes/entertainment/news/' )
,(u'Movie News' , u'http://feeds.feedburner.com/latimes/entertainment/news/movies/' )
,(u'Movie Reviews' , u'http://feeds.feedburner.com/movies/reviews/' )
,(u'Music News' , u'http://feeds.feedburner.com/latimes/entertainment/news/music/' )
,(u'Pop Album Reviews' , u'http://feeds.feedburner.com/latimes/pop-album-reviews' )
,(u'Restaurant Reviews' , u'http://feeds.feedburner.com/latimes/restaurant/reviews' )
,(u'Theatar and Dance' , u'http://feeds.feedburner.com/latimes/theaterdance' )
,(u'Autos' , u'http://feeds.latimes.com/latimes/classified/automotive/highway1/')
,(u'Books' , u'http://feeds.latimes.com/features/books' )
,(u'Food' , u'http://feeds.latimes.com/latimes/features/food/' )
,(u'Health' , u'http://feeds.latimes.com/latimes/features/health/' )
,(u'Real Estate' , u'http://feeds.latimes.com/latimes/classified/realestate/' )
,(u'Commentary' , u'http://feeds2.feedburner.com/latimes/news/opinion/commentary/' )
,(u'Sports' , u'http://feeds.latimes.com/latimes/sports/' )
]
def get_article_url(self, article):
ans = article.get('feedburner_origlink').rpartition('?')[0]
ans = BasicNewsRecipe.get_article_url(self, article).rpartition('?')[0]
try:
self.log('Looking for full story link in', ans)
@ -83,4 +102,22 @@ class LATimes(BasicNewsRecipe):
pass
return ans
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name ='div'
item.attrs =[]
else:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup

View File

@ -21,7 +21,7 @@ class LaTribuna(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = 'es'
language = 'es_HN'
lang = 'es-HN'
direction = 'ltr'

View File

@ -18,7 +18,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'es'
language = 'es_BO'
publication_type = 'newspaper'
delay = 1
remove_empty_feeds = True

View File

@ -12,7 +12,7 @@ import datetime
class Milenio(BasicNewsRecipe):
title = u'Milenio-diario'
__author__ = 'Bmsleight'
language = 'es'
language = 'es_MX'
description = 'Milenio-diario'
oldest_article = 10
max_articles_per_feed = 100

View File

@ -20,7 +20,7 @@ class MiradasAlSur(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'es'
language = 'es_AR'
lang = 'es-AR'
direction = 'ltr'

View File

@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
title = 'Montevideo COMM'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5

View File

@ -20,7 +20,7 @@ class Newsweek_Argentina(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'es'
language = 'es_AR'
lang = 'es-AR'
direction = 'ltr'

View File

@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
title = 'Observa Digital'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
description = 'Noticias desde Uruguay'
language = 'es'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5

View File

@ -19,15 +19,15 @@ class Pagina12(BasicNewsRecipe):
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'es'
language = 'es_AR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
#autor{font-weight: bold}
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
#autor{font-weight: bold}
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
.fgprincipal{font-size: large; font-weight: bold}
"""
@ -83,7 +83,7 @@ class Pagina12(BasicNewsRecipe):
del it['href']
del it['title']
for item in soup.findAll('p'):
it = item.find('h3')
it = item.find('h3')
if it:
it.name='span'
return soup
return soup

View File

@ -17,7 +17,7 @@ class Perfil(BasicNewsRecipe):
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'es'
language = 'es_AR'
remove_empty_feeds = True
masthead_url = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif'
extra_css = """

View File

@ -13,7 +13,7 @@ class Reptantes(BasicNewsRecipe):
description = u"cada vez que te haces acupuntura, tu muñeco vudú sufre en algún lado"
oldest_article = 130
max_articles_per_feed = 100
language = 'es'
language = 'es_AR'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = False

View File

@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
title = 'Revista Bla'
__author__ = 'Gustavo Azambuja'
description = 'Moda | Uruguay'
language = 'es'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5

View File

@ -20,7 +20,7 @@ class Veintitres(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'es'
language = 'es_AR'
lang = 'es-AR'
direction = 'ltr'

View File

@ -360,6 +360,9 @@ class LinuxFreeze(Command):
def main():
try:
sys.argv[0] = sys.calibre_basename
dfv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
if dfv and os.path.exists(dfv):
sys.path.insert(0, os.path.abspath(dfv))
set_default_encoding()
set_helper()
set_qt_plugin_path()

View File

@ -139,6 +139,13 @@ class CHMReader(CHMFile):
if self.hhc_path not in files and files:
self.hhc_path = files[0]
if self.hhc_path == '.hhc' and self.hhc_path not in files:
from calibre import walk
for x in walk(output_dir):
if os.path.basename(x).lower() in ('index.htm', 'index.html'):
self.hhc_path = os.path.relpath(x, output_dir)
break
def _reformat(self, data, htmlpath):
try:
data = xml_to_unicode(data, strip_encoding_pats=True)[0]

View File

@ -53,7 +53,7 @@ def find_pages(dir, sort_on_mtime=False, verbose=False):
prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages]))
return pages
class PageProcessor(list):
class PageProcessor(list): # {{{
'''
Contains the actual image rendering logic. See :method:`render` and
:method:`process_pages`.
@ -111,6 +111,13 @@ class PageProcessor(list):
SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
try:
if self.opts.comic_image_size:
SCRWIDTH, SCRHEIGHT = map(int, [x.strip() for x in
self.opts.comic_image_size.split('x')])
except:
pass # Ignore
if self.opts.keep_aspect_ratio:
# Preserve the aspect ratio by adding border
aspect = float(sizex) / float(sizey)
@ -170,6 +177,7 @@ class PageProcessor(list):
dest = dest[:-1]
os.rename(dest+'8', dest)
self.append(dest)
# }}}
def render_pages(tasks, dest, opts, notification=lambda x, y: x):
'''
@ -291,7 +299,11 @@ class ComicInput(InputFormatPlugin):
OptionRecommendation(name='no_process', recommended_value=False,
help=_("Apply no processing to the image")),
OptionRecommendation(name='dont_grayscale', recommended_value=False,
help=_('Do not convert the image to grayscale (black and white)'))
help=_('Do not convert the image to grayscale (black and white)')),
OptionRecommendation(name='comic_image_size', recommended_value=None,
help=_('Specify the image size as widthxheight pixels. Normally,'
' an image size is automatically calculated from the output '
'profile, this option overrides it.')),
])
recommendations = set([

View File

@ -24,10 +24,11 @@ class HeuristicProcessor(object):
self.chapters_no_title = 0
self.chapters_with_title = 0
self.blanks_deleted = False
self.blanks_between_paragraphs = False
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.softbreak = re.compile(r'\s*(?P<openline><p(?=\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|spacer)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
def is_pdftohtml(self, src):
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
@ -42,8 +43,10 @@ class HeuristicProcessor(object):
" chapters. - " + unicode(chap))
return '<h2>'+chap+'</h2>\n'
else:
txt_chap = html2text(chap)
txt_title = html2text(title)
delete_whitespace = re.compile('^\s*(?P<c>.*?)\s*$')
delete_quotes = re.compile('\'\"')
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(chap)))
txt_title = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(title)))
self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
@ -375,9 +378,9 @@ class HeuristicProcessor(object):
html = re.sub('<p\s?/>', '', html)
# Get rid of empty span, bold, font, em, & italics tags
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
self.deleted_nbsps = True
return html
@ -416,6 +419,28 @@ class HeuristicProcessor(object):
return True
return False
def detect_blank_formatting(self, html):
blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE)
blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE)
def markup_spacers(match):
blanks = match.group(0)
blanks = self.blankreg.sub('\n<p class="spacer"> </p>', blanks)
return blanks
html = blanks_before_headings.sub(markup_spacers, html)
html = blanks_after_headings.sub(markup_spacers, html)
if self.html_preprocess_sections > self.min_chapters:
html = re.sub('(?si)^.*?(?=<h\d)', markup_spacers, html)
return html
def detect_soft_breaks(self, html):
if not self.blanks_deleted and self.blanks_between_paragraphs:
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
else:
html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
return html
def __call__(self, html):
self.log.debug("********* Heuristic processing HTML *********")
@ -457,23 +482,23 @@ class HeuristicProcessor(object):
#html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
# Determine whether the document uses interleaved blank lines
blanks_between_paragraphs = self.analyze_blanks(html)
self.blanks_between_paragraphs = self.analyze_blanks(html)
#self.dump(html, 'before_chapter_markup')
# detect chapters/sections to match xpath or splitting logic
if getattr(self.extra_opts, 'markup_chapter_headings', False):
html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
if getattr(self.extra_opts, 'italicize_common_cases', False):
html = self.markup_italicis(html)
# If more than 40% of the lines are empty paragraphs and the user has enabled delete
# blank paragraphs then delete blank lines to clean up spacing
if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
self.log.debug("deleting blank lines")
self.blanks_deleted = True
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
html = self.blankreg.sub('', html)
# Determine line ending type
@ -525,14 +550,13 @@ class HeuristicProcessor(object):
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
if getattr(self.extra_opts, 'format_scene_breaks', False):
html = self.detect_blank_formatting(html)
html = self.detect_soft_breaks(html)
# Center separator lines
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
if not self.blanks_deleted:
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
#html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
if self.deleted_nbsps:
# put back non-breaking spaces in empty paragraphs to preserve original formatting
html = self.blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
html = self.softbreak.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
return html

View File

@ -175,6 +175,19 @@ class EPUBInput(InputFormatPlugin):
raise ValueError(
'EPUB files with DTBook markup are not supported')
for x in list(opf.iterspine()):
ref = x.get('idref', None)
if ref is None:
x.getparent().remove(x)
continue
for y in opf.itermanifest():
if y.get('id', None) == ref and y.get('media-type', None) in \
('application/vnd.adobe-page-template+xml',):
p = x.getparent()
if p is not None:
p.remove(x)
break
with open('content.opf', 'wb') as nopf:
nopf.write(opf.render())

View File

@ -83,6 +83,7 @@ class RTFInput(InputFormatPlugin):
os.mkdir(debug_dir)
debug_dir = 'rtfdebug'
run_lev = 4
self.log('Running RTFParser in debug mode')
except:
pass
parser = ParseRtf(
@ -230,22 +231,6 @@ class RTFInput(InputFormatPlugin):
with open('styles.css', 'ab') as f:
f.write(css)
# def preprocess(self, fname):
# self.log('\tPreprocessing to convert unicode characters')
# try:
# data = open(fname, 'rb').read()
# from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
# tokenizer = RtfTokenizer(data)
# tokens = RtfTokenParser(tokenizer.tokens)
# data = tokens.toRTF()
# fname = 'preprocessed.rtf'
# with open(fname, 'wb') as f:
# f.write(data)
# except:
# self.log.exception(
# 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
# return fname
def convert_borders(self, doc):
border_styles = []
style_map = {}
@ -280,8 +265,6 @@ class RTFInput(InputFormatPlugin):
self.opts = options
self.log = log
self.log('Converting RTF to XML...')
#Name of the preprocesssed RTF file
# fname = self.preprocess(stream.name)
try:
xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e:
@ -335,3 +318,4 @@ class RTFInput(InputFormatPlugin):
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')

View File

@ -238,6 +238,8 @@ class ParseRtf:
bug_handler = RtfInvalidCodeException,
)
enc = 'cp' + encode_obj.get_codepage()
if enc == 'cp10000':
enc = 'mac_roman'
msg = 'Exception in token processing'
if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \

View File

@ -15,8 +15,10 @@
# #
# #
#########################################################################
import sys, os, tempfile, re
import sys, os, tempfile, re
from calibre.ebooks.rtf2xml import copy
class Colors:
"""
Change lines with color info from color numbers to the actual color names.
@ -40,8 +42,10 @@ class Colors:
self.__file = in_file
self.__copy = copy
self.__bug_handler = bug_handler
self.__line = 0
self.__write_to = tempfile.mktemp()
self.__run_level = run_level
def __initiate_values(self):
"""
Initiate all values.
@ -61,6 +65,7 @@ class Colors:
self.__color_num = 1
self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
def __before_color_func(self, line):
"""
Requires:
@ -76,6 +81,7 @@ class Colors:
if self.__token_info == 'mi<mk<clrtbl-beg':
self.__state = 'in_color_table'
self.__write_obj.write(line)
def __default_color_func(self, line):
"""
Requires:
@ -87,6 +93,7 @@ class Colors:
"""
hex_num = line[-3:-1]
self.__color_string += hex_num
def __blue_func(self, line):
"""
Requires:
@ -109,6 +116,7 @@ class Colors:
)
self.__color_num += 1
self.__color_string = '#'
def __in_color_func(self, line):
"""
Requires:
@ -127,12 +135,13 @@ class Colors:
self.__state = 'after_color_table'
else:
action = self.__state_dict.get(self.__token_info)
if action == None:
if action is None:
sys.stderr.write('in module colors.py\n'
'function is self.__in_color_func\n'
'no action for %s' % self.__token_info
)
action(line)
def __after_color_func(self, line):
"""
Check the to see if it contains color info. If it does, extract the
@ -180,6 +189,7 @@ class Colors:
else:
self.__write_obj.write(line)
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
def __sub_from_line_color(self, match_obj):
num = match_obj.group(1)
try:
@ -191,25 +201,27 @@ class Colors:
else:
return 'bdr-color_:no-value'
hex_num = self.__figure_num(num)
return_value = 'bdr-color_:%s' % hex_num
return return_value
return 'bdr-color_:%s' % hex_num
def __figure_num(self, num):
if num == 0:
hex_num = 'false'
else:
hex_num = self.__color_dict.get(num)
if hex_num == None:
if self.__run_level > 3:
msg = 'no value in self.__color_dict for key %s\n' % num
raise self.__bug_hanlder, msg
if hex_num == None:
if hex_num is None:
hex_num = '0'
if self.__run_level > 5:
msg = 'no value in self.__color_dict' \
'for key %s at line %d\n' % (num, self.__line)
raise self.__bug_handler, msg
return hex_num
def __do_nothing_func(self, line):
"""
Bad RTF will have text in the color table
"""
pass
def convert_colors(self):
"""
Requires:
@ -226,20 +238,16 @@ class Colors:
info, and substitute the number with the hex number.
"""
self.__initiate_values()
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action == None:
sys.stderr.write('no no matching state in module fonts.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj:
self.__line+=1
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action is None:
sys.stderr.write('no matching state in module fonts.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "color.data")

View File

@ -33,13 +33,13 @@ class ConvertToTags:
self.__copy = copy
self.__dtd_path = dtd_path
self.__no_dtd = no_dtd
if encoding != 'mac_roman':
self.__encoding = 'cp' + encoding
else:
self.__encoding = 'cp' + encoding
if encoding == 'mac_roman':
self.__encoding = 'mac_roman'
self.__indent = indent
self.__run_level = run_level
self.__write_to = tempfile.mktemp()
self.__convert_utf = False
def __initiate_values(self):
"""
@ -213,7 +213,8 @@ class ConvertToTags:
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
self.__convert_utf = True
else:
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
@ -253,15 +254,28 @@ class ConvertToTags:
an empty tag function.
"""
self.__initiate_values()
self.__write_obj = open(self.__write_to, 'w')
self.__write_dec()
with open(self.__file, 'r') as read_obj:
for line in read_obj:
self.__token_info = line[:16]
action = self.__state_dict.get(self.__token_info)
if action is not None:
action(line)
with open(self.__write_to, 'w') as self.__write_obj:
self.__write_dec()
with open(self.__file, 'r') as read_obj:
for line in read_obj:
self.__token_info = line[:16]
action = self.__state_dict.get(self.__token_info)
if action is not None:
action(line)
self.__write_obj.close()
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
if self.__convert_utf:
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
copy_obj.rename(self.__write_to, self.__file)
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as write_obj:
file = read_obj.read()
try:
file = file.decode(self.__encoding)
write_obj.write(file.encode('utf-8'))
except:
sys.stderr.write('Conversion to UTF-8 is not possible,'
' encoding should be very carefully checked')
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")

View File

@ -75,12 +75,16 @@ class DefaultEncoding:
self._encoding()
self.__datafetched = True
code_page = 'ansicpg' + self.__code_page
if self.__code_page == '10000':
self.__code_page = 'mac_roman'
return self.__platform, code_page, self.__default_num
def get_codepage(self):
if not self.__datafetched:
self._encoding()
self.__datafetched = True
if self.__code_page == '10000':
self.__code_page = 'mac_roman'
return self.__code_page
def get_platform(self):

View File

@ -16,7 +16,9 @@
# #
#########################################################################
import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy
class Fonts:
"""
Change lines with font info from font numbers to the actual font names.
@ -45,6 +47,7 @@ class Fonts:
self.__default_font_num = default_font_num
self.__write_to = tempfile.mktemp()
self.__run_level = run_level
def __initiate_values(self):
"""
Initiate all values.
@ -67,6 +70,7 @@ class Fonts:
self.__font_table = {}
# individual font written
self.__wrote_ind_font = 0
def __default_func(self, line):
"""
Requires:
@ -79,6 +83,7 @@ class Fonts:
if self.__token_info == 'mi<mk<fonttb-beg':
self.__state = 'font_table'
self.__write_obj.write(line)
def __font_table_func(self, line):
"""
Requires:
@ -101,6 +106,7 @@ class Fonts:
self.__font_num = self.__default_font_num
self.__text_line = ''
##self.__write_obj.write(line)
def __font_in_table_func(self, line):
"""
Requires:
@ -138,6 +144,7 @@ class Fonts:
elif self.__token_info == 'mi<mk<fonttb-end':
self.__found_end_font_table_func()
self.__state = 'after_font_table'
def __found_end_font_table_func(self):
"""
Required:
@ -150,7 +157,8 @@ class Fonts:
if not self.__wrote_ind_font:
self.__write_obj.write(
'mi<tg<empty-att_'
'<font-in-table<name>Times<num>0\n' )
'<font-in-table<name>Times<num>0\n')
def __after_font_table_func(self, line):
"""
Required:
@ -169,7 +177,7 @@ class Fonts:
if self.__token_info == 'cw<ci<font-style':
font_num = line[20:-1]
font_name = self.__font_table.get(font_num)
if font_name == None:
if font_name is None:
if self.__run_level > 3:
msg = 'no value for %s in self.__font_table\n' % font_num
raise self.__bug_handler, msg
@ -182,6 +190,7 @@ class Fonts:
)
else:
self.__write_obj.write(line)
def convert_fonts(self):
"""
Required:
@ -197,20 +206,15 @@ class Fonts:
info. Substitute a font name for a font number.
"""
self.__initiate_values()
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action == None:
sys.stderr.write('no no matching state in module fonts.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj:
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action is None:
sys.stderr.write('no matching state in module fonts.py\n' \
+ self.__state + '\n')
action(line)
default_font_name = self.__font_table.get(self.__default_font_num)
if not default_font_name:
default_font_name = 'Not Defined'

View File

@ -43,7 +43,7 @@ class GetCharMap:
def get_char_map(self, map):
if map == 'ansicpg0':
map = 'ansicpg1250'
if map in ('ansicpg10000', '10000'):
if map == 'ansicpg10000':
map = 'mac_roman'
found_map = False
map_dict = {}

View File

@ -126,12 +126,6 @@ class Tokenize:
tokens = re.split(self.__splitexp, input_file)
#remove empty tokens and \n
return filter(lambda x: len(x) > 0 and x != '\n', tokens)
#input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
# line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
# this is for older RTF
#line = re.sub(self.__par_exp, '\\par ', line)
#return filter(lambda x: len(x) > 0, \
#(self.__remove_line.sub('', x) for x in tokens))
def __compile_expressions(self):
SIMPLE_RPL = {
@ -160,7 +154,7 @@ class Tokenize:
}
self.__replace_spchar = MReplace(SIMPLE_RPL)
#add ;? in case of char following \u
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
#manage upr/ud situations
@ -172,14 +166,21 @@ class Tokenize:
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#this is for old RTF
self.__par_exp = re.compile(r'\\\n+')
# self.__par_exp = re.compile(r'\\$')
#handle cw using a digit as argument and without space as delimiter
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
#self.__remove_line = re.compile(r'\n+')
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
def __correct_spliting(self, token):
match_obj = re.search(self.__cwdigit_exp, token)
if match_obj is None:
return token
else:
return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
def tokenize(self):
"""Main class for handling other methods. Reads the file \
, uses method self.sub_reg to make basic substitutions,\
@ -187,7 +188,7 @@ class Tokenize:
#read
with open(self.__file, 'r') as read_obj:
input_file = read_obj.read()
#process simple replacements and split giving us a correct list
#remove '' and \n in the process
tokens = self.__sub_reg_split(input_file)
@ -195,7 +196,9 @@ class Tokenize:
tokens = map(self.__unicode_process, tokens)
#remove empty items created by removing \uc
tokens = filter(lambda x: len(x) > 0, tokens)
#handles bothersome cases
tokens = map(self.__correct_spliting, tokens)
#write
with open(self.__write_to, 'wb') as write_obj:
write_obj.write('\n'.join(tokens))
@ -203,11 +206,9 @@ class Tokenize:
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "tokenize.data")
# if self.__out_file:
# self.__file = self.__out_file
copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to)
#self.__special_tokens = [ '_', '~', "'", '{', '}' ]
# import sys
@ -223,4 +224,4 @@ class Tokenize:
# if __name__ == '__main__':
# sys.exit(main())
# sys.exit(main())

View File

@ -22,7 +22,7 @@ class PluginWidget(Widget, Ui_Form):
['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
'despeckle', 'no_sort', 'no_process', 'landscape',
'dont_sharpen', 'disable_trim', 'wide', 'output_format',
'dont_grayscale']
'dont_grayscale', 'comic_image_size']
)
self.db, self.book_id = db, book_id
for x in get_option('output_format').option.choices:

View File

@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>599</width>
<height>345</height>
<height>398</height>
</rect>
</property>
<property name="windowTitle">
@ -37,70 +37,70 @@
</property>
</widget>
</item>
<item row="3" column="0">
<item row="4" column="0">
<widget class="QCheckBox" name="opt_dont_normalize">
<property name="text">
<string>Disable &amp;normalize</string>
</property>
</widget>
</item>
<item row="4" column="0">
<item row="5" column="0">
<widget class="QCheckBox" name="opt_keep_aspect_ratio">
<property name="text">
<string>Keep &amp;aspect ratio</string>
</property>
</widget>
</item>
<item row="5" column="0">
<item row="6" column="0">
<widget class="QCheckBox" name="opt_dont_sharpen">
<property name="text">
<string>Disable &amp;Sharpening</string>
</property>
</widget>
</item>
<item row="6" column="0">
<item row="7" column="0">
<widget class="QCheckBox" name="opt_disable_trim">
<property name="text">
<string>Disable &amp;Trimming</string>
</property>
</widget>
</item>
<item row="7" column="0">
<item row="8" column="0">
<widget class="QCheckBox" name="opt_wide">
<property name="text">
<string>&amp;Wide</string>
</property>
</widget>
</item>
<item row="8" column="0">
<item row="9" column="0">
<widget class="QCheckBox" name="opt_landscape">
<property name="text">
<string>&amp;Landscape</string>
</property>
</widget>
</item>
<item row="9" column="0">
<item row="10" column="0">
<widget class="QCheckBox" name="opt_right2left">
<property name="text">
<string>&amp;Right to left</string>
</property>
</widget>
</item>
<item row="10" column="0">
<item row="11" column="0">
<widget class="QCheckBox" name="opt_no_sort">
<property name="text">
<string>Don't so&amp;rt</string>
</property>
</widget>
</item>
<item row="11" column="0">
<item row="12" column="0">
<widget class="QCheckBox" name="opt_despeckle">
<property name="text">
<string>De&amp;speckle</string>
</property>
</widget>
</item>
<item row="13" column="0">
<item row="14" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -120,7 +120,7 @@
</property>
</widget>
</item>
<item row="12" column="0">
<item row="13" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>&amp;Output format:</string>
@ -130,7 +130,7 @@
</property>
</widget>
</item>
<item row="12" column="1">
<item row="13" column="1">
<widget class="QComboBox" name="opt_output_format"/>
</item>
<item row="1" column="0">
@ -140,6 +140,19 @@
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Override image &amp;size:</string>
</property>
<property name="buddy">
<cstring>opt_comic_image_size</cstring>
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QLineEdit" name="opt_comic_image_size"/>
</item>
</layout>
</widget>
<resources/>

View File

@ -838,9 +838,9 @@ class DeviceMixin(object): # {{{
format_count[f] = 1
for f in self.device_manager.device.settings().format_map:
if f in format_count.keys():
formats.append((f, _('%i of %i Books' % (format_count[f], len(rows))), True if f in aval_out_formats else False))
formats.append((f, _('%i of %i Books') % (format_count[f], len(rows))), True if f in aval_out_formats else False)
elif f in aval_out_formats:
formats.append((f, _('0 of %i Books' % len(rows)), True))
formats.append((f, _('0 of %i Books') % len(rows)), True)
d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats)
if d.exec_() != QDialog.Accepted:
return

View File

@ -7,7 +7,7 @@ import os, shutil
from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \
QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \
QLineEdit, Qt, QProgressBar, QSize, QTimer
QLineEdit, Qt, QProgressBar, QSize, QTimer, QIcon, QTextEdit
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.library.check_library import CheckLibrary, CHECKS
@ -16,7 +16,7 @@ from calibre import prints, as_unicode
from calibre.ptempfile import PersistentTemporaryFile
from calibre.library.sqlite import DBThread, OperationalError
class DBCheck(QDialog):
class DBCheck(QDialog): # {{{
def __init__(self, parent, db):
QDialog.__init__(self, parent)
@ -134,7 +134,7 @@ class DBCheck(QDialog):
def reject(self):
self.rejected = True
QDialog.reject(self)
# }}}
class Item(QTreeWidgetItem):
pass
@ -146,9 +146,70 @@ class CheckLibraryDialog(QDialog):
self.db = db
self.setWindowTitle(_('Check Library -- Problems Found'))
self.setWindowIcon(QIcon(I('debug.png')))
self._layout = QVBoxLayout(self)
self.setLayout(self._layout)
self._tl = QHBoxLayout()
self._layout = QVBoxLayout()
self.setLayout(self._tl)
self._tl.addLayout(self._layout)
self.helpw = QTextEdit(self)
self._tl.addWidget(self.helpw)
self.helpw.setReadOnly(True)
self.helpw.setText(_('''\
<h1>Help</h1>
<p>calibre stores the list of your books and their metadata in a
database. The actual book files and covers are stored as normal
files in the calibre library folder. The database contains a list of the files
and covers belonging to each book entry. This tool checks that the
actual files in the library folder on your computer match the
information in the database.</p>
<p>The result of each type of check is shown to the left. The various
checks are:
</p>
<ul>
<li><b>Invalid titles</b>: These are files and folders appearing
in the library where books titles should, but that do not have the
correct form to be a book title.</li>
<li><b>Extra titles</b>: These are extra files in your calibre
library that appear to be correctly-formed titles, but have no corresponding
entries in the database</li>
<li><b>Invalid authors</b>: These are files appearing
in the library where only author folders should be.</li>
<li><b>Extra authors</b>: These are folders in the
calibre library that appear to be authors but that do not have entries
in the database</li>
<li><b>Missing book formats</b>: These are book formats that are in
the database but have no corresponding format file in the book's folder.
<li><b>Extra book formats</b>: These are book format files found in
the book's folder but not in the database.
<li><b>Unknown files in books</b>: These are extra files in the
folder of each book that do not correspond to a known format or cover
file.</li>
<li><b>Missing cover files</b>: These represent books that are marked
in the database as having covers but the actual cover files are
missing.</li>
<li><b>Cover files not in database</b>: These are books that have
cover files but are marked as not having covers in the database.</li>
<li><b>Folder raising exception</b>: These represent folders in the
calibre library that could not be processed/understood by this
tool.</li>
</ul>
<p>There are two kinds of automatic fixes possible: <i>Delete
marked</i> and <i>Fix marked</i>.</p>
<p><i>Delete marked</i> is used to remove extra files/folders/covers that
have no entries in the database. Check the box next to the item you want
to delete. Use with caution.</p>
<p><i>Fix marked</i> is applicable only to covers (the two lines marked
'fixable'). In the case of missing cover files, checking the fixable
box and pushing this button will remove the cover mark from the
database for all the files in that category. In the case of extra
cover files, checking the fixable box and pushing this button will
add the cover mark to the database for all the files in that
category.</p>
'''))
self.log = QTreeWidget(self)
self.log.itemChanged.connect(self.item_changed)
@ -199,7 +260,7 @@ class CheckLibraryDialog(QDialog):
self._layout.addLayout(h)
self._layout.addWidget(self.bbox)
self.resize(750, 500)
self.resize(950, 500)
self.bbox.setEnabled(True)
def do_exec(self):
@ -347,5 +408,6 @@ class CheckLibraryDialog(QDialog):
if __name__ == '__main__':
app = QApplication([])
d = CheckLibraryDialog()
from calibre.library import db
d = CheckLibraryDialog(None, db())
d.exec_()

View File

@ -266,7 +266,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def add_plugin(self):
path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
filters=[(_('Plugins'), ['zip'])], all_files=False,
filters=[(_('Plugins') + ' (*.zip)', ['zip'])], all_files=False,
select_only_single_file=True)
if not path:
return

View File

@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
help = _('The fields to output when cataloging books in the '
'database. Should be a comma-separated list of fields.\n'
'Available fields: %s.\n'
'plus user-created custom fields.\n'
'Example: %s=title,authors,tags\n'
"Default: '%%default'\n"
"Applies to: BIBTEX output format")%(', '.join(FIELDS),
@ -269,7 +270,7 @@ class BIBTEX(CatalogPlugin): # {{{
dest = 'bib_cit',
action = None,
help = _('The template for citation creation from database fields.\n'
' Should be a template with {} enclosed fields.\n'
'Should be a template with {} enclosed fields.\n'
'Available fields: %s.\n'
"Default: '%%default'\n"
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
@ -344,7 +345,7 @@ class BIBTEX(CatalogPlugin): # {{{
if field == 'authors' :
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
elif field in ['title', 'publisher', 'cover', 'uuid',
elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
'author_sort', 'series'] :
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
@ -378,7 +379,7 @@ class BIBTEX(CatalogPlugin): # {{{
if calibre_files:
files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
for format in item]
bibtex_entry.append(u'files = "%s"' % u', '.join(files))
bibtex_entry.append(u'file = "%s"' % u', '.join(files))
elif field == 'series_index' :
bibtex_entry.append(u'volume = "%s"' % int(item))
@ -474,6 +475,8 @@ class BIBTEX(CatalogPlugin): # {{{
if opts.verbose:
opts_dict = vars(opts)
log("%s(): Generating %s" % (self.name,self.fmt))
if opts.connected_device['is_device_connected']:
log(" connected_device: %s" % opts.connected_device['name'])
if opts_dict['search_text']:
log(" --search='%s'" % opts_dict['search_text'])
@ -548,6 +551,7 @@ class BIBTEX(CatalogPlugin): # {{{
as outfile:
#File header
nb_entries = len(data)
#check in book strict if all is ok else throw a warning into log
if bib_entry == 'book' :
nb_books = len(filter(check_entry_book_valid, data))
@ -555,6 +559,11 @@ class BIBTEX(CatalogPlugin): # {{{
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
nb_entries = nb_books
# If connected device, add 'On Device' values to data
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
for entry in data:
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
% (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))

File diff suppressed because it is too large Load Diff

View File

@ -112,6 +112,16 @@ _extra_lang_codes = {
'en_IE' : _('English (Ireland)'),
'en_CN' : _('English (China)'),
'es_PY' : _('Spanish (Paraguay)'),
'es_UY' : _('Spanish (Uruguay)'),
'es_AR' : _('Spanish (Argentina)'),
'es_MX' : _('Spanish (Mexico)'),
'es_CU' : _('Spanish (Cuba)'),
'es_CL' : _('Spanish (Chile)'),
'es_EC' : _('Spanish (Ecuador)'),
'es_HN' : _('Spanish (Honduras)'),
'es_VE' : _('Spanish (Venezuela)'),
'es_BO' : _('Spanish (Bolivia)'),
'es_NI' : _('Spanish (Nicaragua)'),
'de_AT' : _('German (AT)'),
'fr_BE' : _('French (BE)'),
'nl' : _('Dutch (NL)'),