GwR incremental patch to support pseudo-list strings from composite columns

This commit is contained in:
GRiker 2011-01-12 16:06:27 -07:00
commit 8212ab2a85
25 changed files with 1159 additions and 873 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 KiB

View File

@ -1,59 +1,79 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__author__ = 'Gerardo Diez'
__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
__docformat__ = 'restructuredtext en'
'''
www.expansion.com
expansion.es
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Publico(BasicNewsRecipe):
title =u'Expansion.com'
__author__ ='Gerardo Diez'
publisher =u'Unidad Editorial Información Económica, S.L.'
category ='finances, catalunya'
oldest_article =1
max_articles_per_feed =100
simultaneous_downloads =10
cover_url =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
timefmt ='[%A, %d %B, %Y]'
encoding ='latin'
language ='es'
remove_javascript =True
no_stylesheets =True
keep_only_tags =dict(name='div', attrs={'class':['noticia primer_elemento']})
remove_tags =[
dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
dict(name='span', attrs={'class':['comentarios']}),
dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
dict(name='div', attrs={'id':['comentarios_lectores_listado']})
]
feeds =[
(u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
(u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'),
(u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'),
(u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
(u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
(u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
(u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
(u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
(u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
(u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
(u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
(u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
class Expansion(BasicNewsRecipe):
title = 'Diario Expansion'
__author__ = 'Darko Miletic'
description = 'Lider de informacion de mercados, economica y politica'
publisher = 'expansion.com'
category = 'news, politics, Spain'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
delay = 1
encoding = 'iso-8859-15'
language = 'es'
(u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
(u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
(u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
(u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
(u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
(u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
(u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
(u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
(u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
(u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
(u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
(u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
direction = 'ltr'
(u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
(u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
(u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
]
(u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
(u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
(u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
(u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
(u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
(u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
(u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
feeds = [
(u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
]
keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
remove_tags = [
dict(name=['object','link','script'])
,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
]
remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
def preprocess_html(self, soup):
soup.html['dir' ] = self.direction
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup
(u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
(u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
(u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
]

View File

@ -1,10 +1,9 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
msnbc.msn.com
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class MsNBC(BasicNewsRecipe):
@ -19,7 +18,16 @@ class MsNBC(BasicNewsRecipe):
publisher = 'msnbc.com'
category = 'news, USA, world'
language = 'en'
extra_css = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} '
extra_css = """
body{ font-family: Georgia,Times,serif }
.hide{display: none}
.caption{font-family: Arial,sans-serif; font-size: x-small}
.entry-summary{font-family: Arial,sans-serif}
.copyright{font-size: 0.95em; font-style: italic}
.source-org{font-size: small; font-family: Arial,sans-serif}
img{display: block; margin-bottom: 0.5em}
span.byline{display: none}
"""
conversion_options = {
'comments' : description
@ -28,14 +36,20 @@ class MsNBC(BasicNewsRecipe):
,'publisher': publisher
}
preprocess_regexps = [
(re.compile(r'</style></head>', re.DOTALL|re.IGNORECASE),lambda match: '</style>')
,(re.compile(r'<div class="head">', re.DOTALL|re.IGNORECASE),lambda match: '</head><body><div class="head">'),
]
remove_tags_before = dict(name='h1', attrs={'id':'headline'})
remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']})
keep_only_tags=[
dict(attrs={'id':['headline','deck','byline','source','intelliTXT']})
,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']})
]
remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace']
remove_tags = [
dict(name=['iframe','object','link','embed','meta','table'])
,dict(name='span', attrs={'class':['copyright','Linear copyright']})
,dict(name='div', attrs={'class':'social'})
]
remove_tags_before = dict(name='div', attrs={'class':'head'})
remove_tags_after = dict(name='div', attrs={'class':'copyright'})
remove_tags = [dict(name=['iframe','object','link','script','form'])]
feeds = [
(u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml' )
@ -48,11 +62,26 @@ class MsNBC(BasicNewsRecipe):
,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml' )
]
def print_version(self, url):
return url + 'print/1/displaymode/1098/'
def preprocess_html(self, soup):
for item in soup.head.findAll('div'):
item.extract()
for item in soup.body.findAll('html'):
item.name='div'
for item in soup.body.findAll('div'):
if item.has_key('id') and item['id'].startswith('vine-'):
item.extract()
if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')):
item.extract()
for item in soup.body.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
for item in soup.body.findAll('ol'):
if item.has_key('class') and item['class'].startswith('grid'):
item.extract()
for item in soup.body.findAll('span'):
if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ):
item.extract()
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe):
def get_article_url(self, article):
return article.get('guid', article.get('id', None))
def print_version(self, url):
baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
split1 = string.split(url,"/")
@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe):
split2= string.split(xxx,"/")
s = baseurl + split2[0]
return s
def postprocess_html(self,soup, True):
#remove picture
headerhtml = soup.find(True, {'class':'header'})
headerhtml.replaceWith("")
#remove close button
closehtml = soup.find(True, {'class':'close'})
closehtml.replaceWith("")
#remove banner advertisement
bannerhtml = soup.find(True, {'class':'bannerad'})
bannerhtml.replaceWith("")
#thanks kiklop74! This code removes all links from the text
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -2,8 +2,10 @@
__license__ = 'GPL v3'
__docformat__ = 'restructuredtext en'
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.chardet import xml_to_unicode
class Wired_Daily(BasicNewsRecipe):
@ -15,30 +17,43 @@ class Wired_Daily(BasicNewsRecipe):
no_stylesheets = True
preprocess_regexps = [(re.compile(r'<head.*</head>', re.DOTALL), lambda m:
'<head></head>')]
remove_tags_before = dict(name='div', id='content')
remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
'footer', 'advertisement', 'blog_subscription_unit',
'brightcove_component']),
{'class':'entryActions'},
dict(name=['noscript', 'script'])]
remove_tags = [dict(id=['header', 'commenting_module', 'post_nav',
'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget',
'outerWrapper', 'inf_widget']),
{'class':['entryActions', 'advertisement', 'entryTags']},
dict(name=['noscript', 'script']),
dict(name='h4', attrs={'class':re.compile(r'rat\d+')}),
{'class':lambda x: x and x.startswith('contentjump')},
dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})]
feeds = [
('Top News', 'http://feeds.wired.com/wired/index'),
('Culture', 'http://feeds.wired.com/wired/culture'),
('Software', 'http://feeds.wired.com/wired/software'),
('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
('Cars', 'http://feeds.wired.com/wired/cars'),
('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
('Gaming', 'http://feeds.wired.com/wired/gaming'),
('Science', 'http://feeds.wired.com/wired/science'),
('Med Tech', 'http://feeds.wired.com/wired/medtech'),
('Politics', 'http://feeds.wired.com/wired/politics'),
('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
('Commentary', 'http://feeds.wired.com/wired/commentary'),
('Product Reviews',
'http://www.wired.com/reviews/feeds/latestProductsRss'),
('Autopia', 'http://www.wired.com/autopia/feed/'),
('Danger Room', 'http://www.wired.com/dangerroom/feed/'),
('Epicenter', 'http://www.wired.com/epicenter/feed/'),
('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'),
('Geek Dad', 'http://www.wired.com/geekdad/feed/'),
('Playbook', 'http://www.wired.com/playbook/feed/'),
('Rawfile', 'http://www.wired.com/rawfile/feed/'),
('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'),
('Threat Level', 'http://www.wired.com/threatlevel/feed/'),
('Underwire', 'http://www.wired.com/underwire/feed/'),
('Web Monkey', 'http://www.webmonkey.com/feed/'),
('Science', 'http://www.wired.com/wiredscience/feed/'),
]
def populate_article_metadata(self, article, soup, first):
if article.text_summary:
article.text_summary = xml_to_unicode(article.text_summary,
resolve_entities=True)[0]
def print_version(self, url):
return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
return url + '/all/1'

View File

@ -0,0 +1,33 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.zerohedge.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class ZeroHedge(BasicNewsRecipe):
title = 'Zero Hedge'
__author__ = 'Darko Miletic'
description = 'On a long enough timeline the survival rate for everyone drops to zero'
oldest_article = 10
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
encoding = 'utf8'
publisher = 'zero hedge'
category = 'news, USA, world, economy, politics'
language = 'en'
masthead_url = 'http://www.zerohedge.com/themes/newsflash/logo.png'
publication_type = 'blog'
extra_css = 'body{ font-family: sans-serif }'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher': publisher
}
feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')]

View File

@ -459,6 +459,18 @@ def force_unicode(obj, enc=preferred_encoding):
obj = obj.decode('utf-8')
return obj
def as_unicode(obj, enc=preferred_encoding):
if not isbytestring(obj):
try:
obj = unicode(obj)
except:
try:
obj = str(obj)
except:
obj = repr(obj)
return force_unicode(obj, enc=enc)
def human_readable(size):
""" Convert a size in bytes into a human readable form """

View File

@ -10,7 +10,8 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
comment_pat = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL)
category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
tags_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
publisher_pat = re.compile(r'\{\\info.*?\{\\manager(.*?)(?<!\\)\}', re.DOTALL)
def get_document_info(stream):
"""
@ -82,61 +83,73 @@ def decode(raw, codec):
def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """
title, author, comment, category = None, None, None, None
stream.seek(0)
if stream.read(5) != r'{\rtf':
return MetaInformation(None, None)
return MetaInformation(_('Unknown'))
block = get_document_info(stream)[0]
if not block:
return MetaInformation(None, None)
return MetaInformation(_('Unknown'))
stream.seek(0)
cpg = detect_codepage(stream)
stream.seek(0)
title_match = title_pat.search(block)
if title_match:
if title_match is not None:
title = decode(title_match.group(1).strip(), cpg)
else:
title = _('Unknown')
author_match = author_pat.search(block)
if author_match:
if author_match is not None:
author = decode(author_match.group(1).strip(), cpg)
comment_match = comment_pat.search(block)
if comment_match:
comment = decode(comment_match.group(1).strip(), cpg)
category_match = category_pat.search(block)
if category_match:
category = decode(category_match.group(1).strip(), cpg)
mi = MetaInformation(title, author)
else:
author = None
mi = MetaInformation(title)
if author:
mi.authors = string_to_authors(author)
mi.comments = comment
mi.category = category
comment_match = comment_pat.search(block)
if comment_match is not None:
comment = decode(comment_match.group(1).strip(), cpg)
mi.comments = comment
tags_match = tags_pat.search(block)
if tags_match is not None:
tags = decode(tags_match.group(1).strip(), cpg)
mi.tags = tags
publisher_match = publisher_pat.search(block)
if publisher_match is not None:
publisher = decode(publisher_match.group(1).strip(), cpg)
mi.publisher = publisher
return mi
def create_metadata(stream, options):
md = r'{\info'
md = [r'{\info']
if options.title:
title = options.title.encode('ascii', 'ignore')
md += r'{\title %s}'%(title,)
md.append(r'{\title %s}'%(title,))
if options.authors:
au = options.authors
if not isinstance(au, basestring):
au = u', '.join(au)
author = au.encode('ascii', 'ignore')
md += r'{\author %s}'%(author,)
if options.get('category', None):
category = options.category.encode('ascii', 'ignore')
md += r'{\category %s}'%(category,)
md.append(r'{\author %s}'%(author,))
comp = options.comment if hasattr(options, 'comment') else options.comments
if comp:
comment = comp.encode('ascii', 'ignore')
md += r'{\subject %s}'%(comment,)
if len(md) > 6:
md += '}'
md.append(r'{\subject %s}'%(comment,))
if options.publisher:
publisher = options.publisher.encode('ascii', 'ignore')
md.append(r'{\manager %s}'%(publisher,))
if options.tags:
tags = u', '.join(options.tags)
tags = tags.encode('ascii', 'ignore')
md.append(r'{\category %s}'%(tags,))
if len(md) > 1:
md.append('}')
stream.seek(0)
src = stream.read()
ans = src[:6] + md + src[6:]
ans = src[:6] + u''.join(md) + src[6:]
stream.seek(0)
stream.write(ans)
@ -156,7 +169,7 @@ def set_metadata(stream, options):
base_pat = r'\{\\name(.*?)(?<!\\)\}'
title = options.title
if title != None:
if title is not None:
title = title.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'title'), re.DOTALL)
if pat.search(src):
@ -164,7 +177,7 @@ def set_metadata(stream, options):
else:
src = add_metadata_item(src, 'title', title)
comment = options.comments
if comment != None:
if comment is not None:
comment = comment.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL)
if pat.search(src):
@ -172,7 +185,7 @@ def set_metadata(stream, options):
else:
src = add_metadata_item(src, 'subject', comment)
author = options.authors
if author != None:
if author is not None:
author = ', '.join(author)
author = author.encode('ascii', 'ignore')
pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL)
@ -180,14 +193,23 @@ def set_metadata(stream, options):
src = pat.sub(r'{\\author ' + author + r'}', src)
else:
src = add_metadata_item(src, 'author', author)
category = options.get('category', None)
if category != None:
category = category.encode('ascii', 'replace')
tags = options.tags
if tags is not None:
tags = ', '.join(tags)
tags = tags.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
if pat.search(src):
src = pat.sub(r'{\\category ' + category + r'}', src)
src = pat.sub(r'{\\category ' + tags + r'}', src)
else:
src = add_metadata_item(src, 'category', category)
src = add_metadata_item(src, 'category', tags)
publisher = options.publisher
if publisher is not None:
publisher = publisher.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'manager'), re.DOTALL)
if pat.search(src):
src = pat.sub(r'{\\manager ' + publisher + r'}', src)
else:
src = add_metadata_item(src, 'manager', publisher)
stream.seek(pos + olen)
after = stream.read()
stream.seek(pos)

View File

@ -1,4 +1,8 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Read content from txt file.
@ -10,10 +14,7 @@ from calibre import prepare_string_for_xml, isbytestring
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
from calibre.ebooks.conversion.preprocess import DocAnalysis
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from calibre.utils.cleantext import clean_ascii_chars
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
@ -33,9 +34,7 @@ def clean_txt(txt):
# Remove excessive line breaks.
txt = re.sub('\n{3,}', '\n\n', txt)
#remove ASCII invalid chars : 0 to 8 and 11-14 to 24
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19))
illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
txt = illegal_chars.sub('', txt)
txt = clean_ascii_chars(txt)
return txt

View File

@ -27,14 +27,17 @@ class PluginWidget(QWidget, Ui_Form):
def __init__(self, parent=None):
QWidget.__init__(self, parent)
self.setupUi(self)
from calibre.library.catalog import FIELDS
self.all_fields = []
for x in FIELDS :
if x != 'all':
self.all_fields.append(x)
QListWidgetItem(x, self.db_fields)
def initialize(self, name, db): #not working properly to update
from calibre.library.catalog import FIELDS
self.all_fields = [x for x in FIELDS if x != 'all']
#add custom columns
self.all_fields.extend([x for x in sorted(db.custom_field_keys())])
#populate
for x in self.all_fields:
QListWidgetItem(x, self.db_fields)
self.name = name
fields = gprefs.get(name+'_db_fields', self.all_fields)
# Restore the activated db_fields from last use

View File

@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string
from calibre.ebooks.metadata.book.base import composite_formatter
from calibre.ebooks.metadata.meta import get_metadata
from calibre.gui2.custom_column_widgets import populate_metadata_page
from calibre.gui2 import error_dialog
from calibre.gui2 import error_dialog, ResizableDialog
from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.utils.config import dynamic
from calibre.utils.titlecase import titlecase
@ -49,7 +49,7 @@ def get_cover_data(path):
class MyBlockingBusy(QDialog):
class MyBlockingBusy(QDialog): # {{{
do_one_signal = pyqtSignal()
@ -241,8 +241,9 @@ class MyBlockingBusy(QDialog):
self.current_index += 1
self.do_one_signal.emit()
# }}}
class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
s_r_functions = { '' : lambda x: x,
_('Lower Case') : lambda x: icu_lower(x),
@ -261,9 +262,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
]
def __init__(self, window, rows, model, tab):
QDialog.__init__(self, window)
ResizableDialog.__init__(self, window)
Ui_MetadataBulkDialog.__init__(self)
self.setupUi(self)
self.model = model
self.db = model.db
self.ids = [self.db.id(r) for r in rows]

File diff suppressed because it is too large Load Diff

View File

@ -823,7 +823,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
if book.series_index is not None:
self.series_index.setValue(book.series_index)
if book.has_cover:
if d.opt_auto_download_cover.isChecked() and book.has_cover:
if d.opt_auto_download_cover.isChecked():
self.fetch_cover()
else:
self.fetch_cover_button.setFocus(Qt.OtherFocusReason)

View File

@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en'
from functools import partial
from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \
pyqtSignal, QToolButton, QPushButton, \
QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup, \
QMenu
pyqtSignal, QToolButton, QMenu, QCheckBox, \
QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup
from calibre.constants import __appname__
from calibre.gui2.search_box import SearchBox2, SavedSearchBox
@ -178,7 +178,9 @@ class SearchBar(QWidget): # {{{
x.setToolTip(_("<p>Search the list of books by title, author, publisher, tags, comments, etc.<br><br>Words separated by spaces are ANDed"))
l.addWidget(x)
self.search_button = QPushButton(_('&Go!'))
self.search_button = QToolButton()
self.search_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
self.search_button.setText(_('&Go!'))
l.addWidget(self.search_button)
self.search_button.setSizePolicy(QSizePolicy.Minimum,
QSizePolicy.Minimum)
@ -192,6 +194,12 @@ class SearchBar(QWidget): # {{{
l.addWidget(x)
x.setToolTip(_("Reset Quick Search"))
x = parent.search_highlight_only = QCheckBox()
x.setText(_('&Highlight'))
x.setToolTip(_('Highlight matched books in the book list, instead '
'of restricting the book list to the matches.'))
l.addWidget(x)
x = parent.saved_search = SavedSearchBox(self)
x.setMaximumSize(QSize(150, 16777215))
x.setMinimumContentsLength(15)

View File

@ -10,7 +10,7 @@ from contextlib import closing
from operator import attrgetter
from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
QModelIndex, QVariant, QDate
QModelIndex, QVariant, QDate, QColor
from calibre.gui2 import NONE, config, UNDEFINED_QDATE
from calibre.utils.pyparsing import ParseException
@ -93,6 +93,9 @@ class BooksModel(QAbstractTableModel): # {{{
self.bool_no_icon = QIcon(I('list_remove.png'))
self.bool_blank_icon = QIcon(I('blank.png'))
self.device_connected = False
self.rows_matching = set()
self.lowest_row_matching = None
self.highlight_only = False
self.read_config()
def change_alignment(self, colname, alignment):
@ -229,9 +232,27 @@ class BooksModel(QAbstractTableModel): # {{{
self.endInsertRows()
self.count_changed()
def set_highlight_only(self, toWhat):
self.highlight_only = toWhat
if self.last_search:
self.research()
def search(self, text, reset=True):
try:
self.db.search(text)
if self.highlight_only:
self.db.search('')
if not text:
self.rows_matching = set()
self.lowest_row_matching = None
else:
self.rows_matching = self.db.search(text, return_matches=True)
if self.rows_matching:
self.lowest_row_matching = self.db.row(self.rows_matching[0])
self.rows_matching = set(self.rows_matching)
else:
self.rows_matching = set()
self.lowest_row_matching = None
self.db.search(text)
except ParseException as e:
self.searched.emit(e.msg)
return
@ -337,8 +358,9 @@ class BooksModel(QAbstractTableModel): # {{{
name, val = mi.format_field(key)
if mi.metadata_for_field(key)['datatype'] == 'comments':
name += ':html'
if val:
if val and name not in data:
data[name] = val
return data
@ -651,6 +673,9 @@ class BooksModel(QAbstractTableModel): # {{{
return NONE
if role in (Qt.DisplayRole, Qt.EditRole):
return self.column_to_dc_map[col](index.row())
elif role == Qt.BackgroundColorRole:
if self.id(index) in self.rows_matching:
return QColor('lightgreen')
elif role == Qt.DecorationRole:
if self.column_to_dc_decorator_map[col] is not None:
return self.column_to_dc_decorator_map[index.column()](index.row())

View File

@ -680,8 +680,14 @@ class BooksView(QTableView): # {{{
def set_editable(self, editable, supports_backloading):
self._model.set_editable(editable)
def search_proxy(self, txt):
self._model.search(txt)
if self._model.lowest_row_matching is not None:
self.select_rows([self._model.lowest_row_matching], using_ids=False)
self.setFocus(Qt.OtherFocusReason)
def connect_to_search_box(self, sb, search_done):
sb.search.connect(self._model.search)
sb.search.connect(self.search_proxy)
self._search_done = search_done
self._model.searched.connect(self.search_done)

View File

@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel):
dont_remove_from=set(['toolbar-device']))
if name is None:
return FakeAction('--- '+_('Separator')+' ---', None)
return gui.iactions[name]
try:
return gui.iactions[name]
except:
return None
def rowCount(self, parent):
return len(self._data)
@ -124,7 +127,8 @@ class CurrentModel(BaseModel):
BaseModel.__init__(self)
self.gprefs_name = 'action-layout-'+key
current = gprefs[self.gprefs_name]
self._data = [self.name_to_action(x, gui) for x in current]
self._data = [self.name_to_action(x, gui) for x in current]
self._data = [x for x in self._data if x is not None]
self.key = key
self.gui = gui

View File

@ -16,6 +16,7 @@ from calibre.gui2 import config
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor
from calibre.gui2.dialogs.search import SearchDialog
from calibre.utils.config import dynamic
from calibre.utils.search_query_parser import saved_searches
from calibre.utils.icu import sort_key
@ -375,6 +376,9 @@ class SearchBoxMixin(object): # {{{
unicode(self.search.toolTip())))
self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
self.clear_button.setStatusTip(self.clear_button.toolTip())
self.search_highlight_only.stateChanged.connect(self.highlight_only_changed)
self.search_highlight_only.setChecked(
dynamic.get('search_highlight_only', False))
def focus_search_box(self, *args):
self.search.setFocus(Qt.OtherFocusReason)
@ -401,6 +405,11 @@ class SearchBoxMixin(object): # {{{
def focus_to_library(self):
self.current_view().setFocus(Qt.OtherFocusReason)
def highlight_only_changed(self, toWhat):
dynamic.set('search_highlight_only', toWhat)
self.current_view().model().set_highlight_only(toWhat)
self.focus_to_library()
# }}}
class SavedSearchBoxMixin(object): # {{{

View File

@ -26,6 +26,7 @@ from calibre.gui2.search_box import SearchBox2
from calibre.ebooks.metadata import MetaInformation
from calibre.customize.ui import available_input_formats
from calibre.gui2.viewer.dictionary import Lookup
from calibre import as_unicode
class TOCItem(QStandardItem):
@ -632,7 +633,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
else:
r = getattr(worker.exception, 'reason', worker.exception)
error_dialog(self, _('Could not open ebook'),
unicode(r), det_msg=worker.traceback, show=True)
as_unicode(r), det_msg=worker.traceback, show=True)
self.close_progress_indicator()
else:
self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:])

View File

@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{
if isinstance(location, list):
if allow_recursion:
for loc in location:
matches |= self.get_matches(loc, query, allow_recursion=False)
matches |= self.get_matches(loc, query, candidates,
allow_recursion=False)
return matches
raise ParseException(query, len(query), 'Recursive query group detected', self)
@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{
fm = self.field_metadata[location]
# take care of dates special case
if fm['datatype'] == 'datetime':
return self.get_dates_matches(location, query.lower())
return self.get_dates_matches(location, query.lower(), candidates)
# take care of numbers special case
if fm['datatype'] in ('rating', 'int', 'float'):
return self.get_numeric_matches(location, query.lower())
return self.get_numeric_matches(location, query.lower(), candidates)
# take care of the 'count' operator for is_multiples
if fm['is_multiple'] and \
@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{
query[1:1] in '=<>!':
vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\
len(item[loc].split(ms)) if item[loc] is not None else 0
return self.get_numeric_matches(location, query[1:], val_func=vf)
return self.get_numeric_matches(location, query[1:],
candidates, val_func=vf)
# everything else, or 'all' matches
matchkind = CONTAINS_MATCH

View File

@ -1531,10 +1531,23 @@ class EPUB_MOBI(CatalogPlugin):
self.opts.header_note_source_field,
index_is_id=True)
if notes:
if field_md['datatype'] == 'text' and isinstance(notes,list):
notes = ' &middot; '.join(notes)
if field_md['datatype'] == 'text':
print "\n inner notes: %s\n" % repr(notes)
if isinstance(notes,list):
notes = ' &middot; '.join(notes)
elif field_md['datatype'] == 'datetime':
notes = format_date(notes,'dd MMM yyyy')
elif field_md['datatype'] == 'composite':
if re.match(r'\[(.*)\]$', notes):
# Sniff for special pseudo-list string "[<item, item>]"
bracketed_content = re.match(r'\[(.*)\]$', notes).group(1)
if re.search(',',bracketed_content):
# Recast the comma-separated items as a list
items = bracketed_content.split(',')
items = [i.strip() for i in items]
notes = ' &middot; '.join(items)
else:
notes = bracketed_content
this_title['notes'] = {'source':field_md['name'],
'content':notes}

View File

@ -341,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.has_id = self.data.has_id
self.count = self.data.count
# Count times get_metadata is called, and how many times in the cache
self.gm_count = 0
self.gm_missed = 0
for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
'publisher', 'rating', 'series', 'series_index', 'tags',
'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'):
@ -710,6 +706,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
formats = row[fm['formats']]
if not formats:
formats = None
else:
formats = formats.split(',')
mi.formats = formats
tags = row[fm['tags']]
if tags:

View File

@ -110,6 +110,7 @@ class cmd_commit(_cmd_commit):
suffix = 'The fix will be in the next release.'
action = action+'ed'
msg = '%s in branch %s. %s'%(action, nick, suffix)
msg = msg.replace('Fixesed', 'Fixed')
server = xmlrpclib.ServerProxy(url)
server.ticket.update(int(bug), msg,
{'status':'closed', 'resolution':'fixed'},

View File

@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en'
import re
import re, htmlentitydefs
_ascii_pat = None
@ -21,3 +21,32 @@ def clean_ascii_chars(txt, charlist=None):
pat = re.compile(u'|'.join(map(unichr, charlist)))
return pat.sub('', txt)
##
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
# Removes HTML or XML character references and entities from a text string.
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text, rm=False, rchar=u''):
def fixup(m, rm=rm, rchar=rchar):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
if rm:
return rchar #replace by char
return text # leave as is
return re.sub("&#?\w+;", fixup, text)

View File

@ -18,6 +18,24 @@ class _Parser(object):
LEX_NUM = 4
LEX_EOF = 5
def _python(self, func):
locals = {}
exec func in locals
if 'evaluate' not in locals:
self.error('no evaluate function in python')
try:
result = locals['evaluate'](self.parent.kwargs)
if isinstance(result, (float, int)):
result = unicode(result)
elif isinstance(result, list):
result = ','.join(result)
elif isinstance(result, str):
result = unicode(result)
return result
except Exception as e:
self.error('python function threw exception: ' + e.msg)
def _strcmp(self, x, y, lt, eq, gt):
v = strcmp(x, y)
if v < 0:
@ -79,6 +97,7 @@ class _Parser(object):
'field' : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)),
'multiply' : (2, partial(_math, op='*')),
'print' : (-1, _print),
'python' : (1, _python),
'strcat' : (-1, _concat),
'strcmp' : (5, _strcmp),
'substr' : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]),
@ -362,7 +381,7 @@ class TemplateFormatter(string.Formatter):
(r'\'.*?((?<!\\)\')', lambda x,t: (3, t[1:-1])),
(r'\n#.*?(?=\n)', None),
(r'\s', None)
])
], flags=re.DOTALL)
def _eval_program(self, val, prog):
# keep a cache of the lex'ed program under the theory that re-lexing