Merge from trunk

This commit is contained in:
Charles Haley 2011-03-11 07:48:01 +00:00
commit e2917dcda7
24 changed files with 435 additions and 69 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 290 B

View File

@ -1,17 +1,83 @@
from calibre.web.feeds.news import BasicNewsRecipe
#!/usr/bin/env python
class AdvancedUserRecipe1257093338(BasicNewsRecipe):
from calibre.web.feeds.news import BasicNewsRecipe
class golem_ger(BasicNewsRecipe):
title = u'Golem.de'
language = 'de'
__author__ = 'Kovid Goyal'
oldest_article = 7
max_articles_per_feed = 100
language = 'de'
lang = 'de-DE'
no_stylesheets = True
encoding = 'iso-8859-1'
recursions = 1
match_regexps = [r'http://www.golem.de/.*.html']
feeds = [(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0')]
keep_only_tags = [
dict(name='h1', attrs={'class':'artikelhead'}),
dict(name='p', attrs={'class':'teaser'}),
dict(name='div', attrs={'class':'artikeltext'}),
dict(name='h2', attrs={'id':'artikelhead'}),
]
def print_version(self, url):
murxb = url.rfind('/') + 1
murxc = url[murxb :-5]
murxa = 'http://www.golem.de/' + 'print.php?a=' + murxc
return murxa
remove_tags = [
dict(name='div', attrs={'id':['similarContent','topContentWrapper','storycarousel','aboveFootPromo','comments','toolbar','breadcrumbs','commentlink','sidebar','rightColumn']}),
dict(name='div', attrs={'class':['gg_embeddedSubText','gg_embeddedIndex gg_solid','gg_toOldGallery','golemGallery']}),
dict(name='img', attrs={'class':['gg_embedded','gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer']}),
dict(name='td', attrs={'class':['xsmall']}),
]
# remove_tags_after = [
# dict(name='div', attrs={'id':['contentad2']})
# ]
feeds = [
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
(u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
(u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=ATOM1.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=ATOM1.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
(u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
(u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
(u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
(u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
(u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
(u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
(u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0'),
(u'Unternehmen/Maerkte', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0')
]
feeds = [
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=feed=RSS2.0'),
(u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
]
extra_css = '''
h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:30px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;margin-bottom:2 em;}
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:x-small; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal; line-height:5px;}
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
.teaser {font-style:italic;font-size:12pt;margin-bottom:15pt;}
.xsmall{font-style:italic;font-size:x-small;}
.td{font-style:italic;font-size:x-small;}
img {align:left;}
'''

View File

@ -11,6 +11,26 @@ class AdvancedUserRecipe1259599587(BasicNewsRecipe):
feeds = [(u'gulli:news', u'http://ticker.gulli.com/rss/')]
remove_tags = [{'class' : ['addthis_button', 'BreadCrumb']}, {'id' : ['plista0']}]
remove_tags = [dict(name='div', attrs={'class':['FloatL','_forumBox']})]
keep_only_tags = [dict(name='div', attrs={'class':'inside'})]
keep_only_tags = [dict(name='div', attrs={'id':['_contentLeft']})]
remove_tags_after = [dict(name='div', attrs={'class':['_bookmark']})]
extra_css = '''
h1 {color:#008852;font-family:Arial,Helvetica,sans-serif; font-size:25px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:22px; }
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
.newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
.articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
.byline {color:#666;margin-bottom:0;font-size:12px}
.blockquote {color:#030303;font-style:italic;padding-left:15px;}
img {align:center;}
.li {list-style-type: none}
'''

View File

@ -0,0 +1,42 @@
import urllib2
from calibre.web.feeds.news import BasicNewsRecipe
class JBPress(BasicNewsRecipe):
title = u'JBPress'
language = 'ja'
description = u'Japan Business Press New articles (using small print version)'
__author__ = 'Ado Nishimura'
needs_subscription = True
oldest_article = 7
max_articles_per_feed = 100
remove_tags_before = dict(id='wrapper')
no_stylesheets = True
feeds = [('JBPress new article', 'http://feed.ismedia.jp/rss/jbpress/all.rdf')]
def get_cover_url(self):
return 'http://www.jbpress.co.jp/common/images/v1/jpn/common/logo.gif'
def get_browser(self):
html = '''<form action="https://jbpress.ismedia.jp/auth/dologin/http://jbpress.ismedia.jp/articles/print/5549" method="post">
<input id="login" name="login" type="text"/>
<input id="password" name="password" type="password"/>
<input id="rememberme" name="rememberme" type="checkbox"/>
</form>
'''
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://jbpress.ismedia.jp/articles/print/5549')
response = br.response()
response.set_data(html)
br.set_response(response)
br.select_form(nr=0)
br["login"] = self.username
br['password'] = self.password
br.submit()
return br
def print_version(self, url):
url = urllib2.urlopen(url).geturl() # resolve redirect.
return url.replace('/-/', '/print/')

View File

@ -17,6 +17,7 @@ class Lanacion(BasicNewsRecipe):
use_embedded_content = False
no_stylesheets = True
language = 'es_AR'
delay = 14
publication_type = 'newspaper'
remove_empty_feeds = True
masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif'
@ -25,7 +26,7 @@ class Lanacion(BasicNewsRecipe):
h2{color: #626262; font-weight: normal; font-size: 1.1em}
body{font-family: Arial,sans-serif}
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
.notaFecha{color: #808080}
.notaFecha{color: #808080; font-size: small}
.notaEpigrafe{font-size: x-small}
.topNota h1{font-family: Arial,sans-serif}
"""
@ -38,7 +39,10 @@ class Lanacion(BasicNewsRecipe):
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
keep_only_tags = [
dict(name='div', attrs={'class':['topNota','itemHeader','nota','itemBody']})
,dict(name='div', attrs={'id':'content'})
]
remove_tags = [
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
@ -52,8 +56,7 @@ class Lanacion(BasicNewsRecipe):
remove_attributes = ['height','width','visible','onclick','data-count','name']
feeds = [
(u'Ultimas Noticias' , u'http://servicios.lanacion.com.ar/herramientas/rss/origen=2' )
,(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' )
@ -81,17 +84,12 @@ class Lanacion(BasicNewsRecipe):
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_debug_redirects(True)
br.set_debug_responses(True)
br.set_debug_http(True)
return br
def get_article_url(self, article):
link = BasicNewsRecipe.get_article_url(self,article)
if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
return None
return self.browser.open_novisit(link).geturl()
if link.rfind('galeria=') > 0:
return None
return link
def preprocess_html(self, soup):

View File

@ -0,0 +1,33 @@
EMAILADDRESS = 'hoge@foobar.co.jp'
from calibre.web.feeds.news import BasicNewsRecipe
class NBOnline(BasicNewsRecipe):
title = u'Nikkei Business Online'
language = 'ja'
description = u'Nikkei Business Online New articles. PLEASE NOTE: You need to edit EMAILADDRESS line of this "nbonline.recipe" file to set your e-mail address which is needed when login. (file is in "Calibre2/resources/recipes" directory.)'
__author__ = 'Ado Nishimura'
needs_subscription = True
oldest_article = 7
max_articles_per_feed = 100
remove_tags_before = dict(id='kanban')
remove_tags = [dict(name='div', id='footer')]
feeds = [('Nikkei Buisiness Online', 'http://business.nikkeibp.co.jp/rss/all_nbo.rdf')]
def get_cover_url(self):
return 'http://business.nikkeibp.co.jp/images/nbo/200804/parts/logo.gif'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
br.select_form(name='loginActionForm')
br['email'] = EMAILADDRESS
br['userId'] = self.username
br['password'] = self.password
br.submit()
return br
def print_version(self, url):
return url + '?ST=print'

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
publika.md
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Publika(BasicNewsRecipe):
title = u'Publika'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u015etiri din Moldova'
publisher = u'Publika'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Moldova'
encoding = 'utf-8'
cover_url = 'http://assets.publika.md/images/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'colLeft'})
]
remove_tags = [
dict(name='div', attrs={'class':['articleInfo']})
, dict(name='div', attrs={'class':['articleRelated']})
, dict(name='div', attrs={'class':['roundedBox socialSharing']})
, dict(name='div', attrs={'class':['comment clearfix']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['roundedBox socialSharing']})
, dict(name='div', attrs={'class':['comment clearfix']})
]
feeds = [
(u'Feeds', u'http://rss.publika.md/stiri.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -72,7 +72,7 @@ class FB2MLizer(object):
def clean_text(self, text):
# Condense empty paragraphs into a line break.
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<p><empty-line /></p>', text)
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line />', text)
# Remove empty paragraphs.
text = re.sub(r'(?miu)<p>\s*</p>', '', text)
# Clean up pargraph endings.
@ -101,9 +101,6 @@ class FB2MLizer(object):
def fb2_header(self):
metadata = {}
metadata['author_first'] = u''
metadata['author_middle'] = u''
metadata['author_last'] = u''
metadata['title'] = self.oeb_book.metadata.title[0].value
metadata['appname'] = __appname__
metadata['version'] = __version__
@ -115,16 +112,36 @@ class FB2MLizer(object):
metadata['id'] = None
metadata['cover'] = self.get_cover()
author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
if len(author_parts) == 1:
metadata['author_last'] = author_parts[0]
elif len(author_parts) == 2:
metadata['author_first'] = author_parts[0]
metadata['author_last'] = author_parts[1]
else:
metadata['author_first'] = author_parts[0]
metadata['author_middle'] = ' '.join(author_parts[1:-2])
metadata['author_last'] = author_parts[-1]
metadata['author'] = u''
for auth in self.oeb_book.metadata.creator:
author_first = u''
author_middle = u''
author_last = u''
author_parts = auth.value.split(' ')
if len(author_parts) == 1:
author_last = author_parts[0]
elif len(author_parts) == 2:
author_first = author_parts[0]
author_last = author_parts[1]
else:
author_first = author_parts[0]
author_middle = ' '.join(author_parts[1:-1])
author_last = author_parts[-1]
metadata['author'] += '<author>'
metadata['author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(author_first)
if author_middle:
metadata['author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(author_middle)
metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last)
metadata['author'] += '</author>'
if not metadata['author']:
metadata['author'] = u'<author><first-name></first-name><last-name><last-name></author>'
metadata['sequence'] = u''
if self.oeb_book.metadata.series:
index = '1'
if self.oeb_book.metadata.series_index:
index = self.oeb_book.metadata.series_index[0]
metadata['sequence'] = u'<sequence name="%s" number="%s" />' % (prepare_string_for_xml(u'%s' % self.oeb_book.metadata.series[0]), index)
identifiers = self.oeb_book.metadata['identifier']
for x in identifiers:
@ -136,28 +153,21 @@ class FB2MLizer(object):
metadata['id'] = str(uuid.uuid4())
for key, value in metadata.items():
if not key == 'cover':
if key not in ('author', 'cover', 'sequence'):
metadata[key] = prepare_string_for_xml(value)
return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
'<description>' \
'<title-info>' \
'<genre>antique</genre>' \
'<author>' \
'<first-name>%(author_first)s</first-name>' \
'<middle-name>%(author_middle)s</middle-name>' \
'<last-name>%(author_last)s</last-name>' \
'</author>' \
'%(author)s' \
'<book-title>%(title)s</book-title>' \
'%(cover)s' \
'<lang>%(lang)s</lang>' \
'%(sequence)s' \
'</title-info>' \
'<document-info>' \
'<author>' \
'<first-name></first-name>' \
'<middle-name></middle-name>' \
'<last-name></last-name>' \
'</author>' \
'%(author)s' \
'<program-used>%(appname)s %(version)s</program-used>' \
'<date>%(date)s</date>' \
'<id>%(id)s</id>' \

View File

@ -23,8 +23,9 @@ cover_url_cache = {}
cache_lock = RLock()
def find_asin(br, isbn):
q = 'http://www.amazon.com/s?field-keywords='+isbn
raw = br.open_novisit(q).read()
q = 'http://www.amazon.com/s/?search-alias=aps&field-keywords='+isbn
res = br.open_novisit(q)
raw = res.read()
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
root = html.fromstring(raw)
@ -151,6 +152,8 @@ def get_metadata(br, asin, mi):
root = soupparser.fromstring(raw)
except:
return False
if root.xpath('//*[@id="errorMessage"]'):
return False
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
if ratings:
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
@ -191,6 +194,7 @@ def main(args=sys.argv):
tdir = tempfile.gettempdir()
br = browser()
for title, isbn in [
('The Heroes', '9780316044981'), # Test find_asin
('Learning Python', '8324616489'), # Test xisbn
('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
# Random tests
@ -207,8 +211,12 @@ def main(args=sys.argv):
#import time
#st = time.time()
print get_social_metadata(title, None, None, isbn)
mi = get_social_metadata(title, None, None, isbn)
if not mi.comments:
print 'Failed to downlaod social metadata for', title
return 1
#print '\n\n', time.time() - st, '\n\n'
print '\n'
return 0

View File

@ -74,6 +74,8 @@ class HeadRequest(mechanize.Request):
class OpenLibraryCovers(CoverDownload): # {{{
'Download covers from openlibrary.org'
# See http://openlibrary.org/dev/docs/api/covers
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
name = 'openlibrary.org covers'
description = _('Download covers from openlibrary.org')
@ -82,7 +84,8 @@ class OpenLibraryCovers(CoverDownload): # {{{
def has_cover(self, mi, ans, timeout=5.):
if not mi.isbn:
return False
br = browser()
from calibre.ebooks.metadata.library_thing import get_browser
br = get_browser()
br.set_handle_redirect(False)
try:
br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout)
@ -98,7 +101,8 @@ class OpenLibraryCovers(CoverDownload): # {{{
def get_covers(self, mi, result_queue, abort, timeout=5.):
if not mi.isbn:
return
br = browser()
from calibre.ebooks.metadata.library_thing import get_browser
br = get_browser()
try:
ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read()
result_queue.put((True, ans, 'jpg', self.name))
@ -137,6 +141,8 @@ class AmazonCovers(CoverDownload): # {{{
br = browser()
try:
url = get_cover_url(mi.isbn, br)
if url is None:
raise ValueError('No cover found for ISBN: %s'%mi.isbn)
cover_data = br.open_novisit(url).read()
result_queue.put((True, cover_data, 'jpg', self.name))
except Exception, e:

View File

@ -908,6 +908,19 @@ class Manifest(object):
pass
data = first_pass(data)
if data.tag == 'HTML':
# Lower case all tag and attribute names
data.tag = data.tag.lower()
for x in data.iterdescendants():
try:
x.tag = x.tag.lower()
for key, val in list(x.attrib.iteritems()):
del x.attrib[key]
key = key.lower()
x.attrib[key] = val
except:
pass
# Handle weird (non-HTML/fragment) files
if barename(data.tag) != 'html':
self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)

View File

@ -12,7 +12,7 @@ from threading import Thread
from PyQt4.Qt import SIGNAL, QObject, Qt, QTimer, QDate, \
QPixmap, QListWidgetItem, QDialog, pyqtSignal, QIcon, \
QPushButton
QPushButton, QKeySequence
from calibre.gui2 import error_dialog, file_icon_provider, dynamic, \
choose_files, choose_images, ResizableDialog, \
@ -469,20 +469,22 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.ts_tooltips = (ok_tooltip, bad_tooltip)
self.row_delta = 0
if prev:
self.prev_button = QPushButton(QIcon(I('back.png')), _('Previous'),
self.prev_button = QPushButton(QIcon(I('back.png')), _('&Previous'),
self)
self.button_box.addButton(self.prev_button, self.button_box.ActionRole)
tip = _('Save changes and edit the metadata of %s')%prev
self.prev_button.setToolTip(tip)
self.prev_button.clicked.connect(partial(self.next_triggered,
-1))
self.prev_button.setShortcut(QKeySequence('Alt+Left'))
if next_:
self.next_button = QPushButton(QIcon(I('forward.png')), _('Next'),
self.next_button = QPushButton(QIcon(I('forward.png')), _('&Next'),
self)
self.button_box.addButton(self.next_button, self.button_box.ActionRole)
tip = _('Save changes and edit the metadata of %s')%next_
self.next_button.setToolTip(tip)
self.next_button.clicked.connect(partial(self.next_triggered, 1))
self.next_button.setShortcut(QKeySequence('Alt+Right'))
self.splitter.setStretchFactor(100, 1)
self.read_state()

View File

@ -11,7 +11,7 @@ from functools import partial
from PyQt4.Qt import Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, \
QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont, \
QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem, \
QSizePolicy, QPalette, QFrame, QSize
QSizePolicy, QPalette, QFrame, QSize, QKeySequence
from calibre.ebooks.metadata import authors_to_string, string_to_authors
from calibre.gui2 import ResizableDialog, error_dialog, gprefs
@ -43,11 +43,14 @@ class MetadataSingleDialogBase(ResizableDialog):
self)
self.button_box.accepted.connect(self.accept)
self.button_box.rejected.connect(self.reject)
self.next_button = QPushButton(QIcon(I('forward.png')), _('Next'),
self.next_button = QPushButton(QIcon(I('forward.png')), _('&Next'),
self)
self.next_button.setShortcut(QKeySequence('Alt+Right'))
self.next_button.clicked.connect(partial(self.do_one, delta=1))
self.prev_button = QPushButton(QIcon(I('back.png')), _('Previous'),
self.prev_button = QPushButton(QIcon(I('back.png')), _('&Previous'),
self)
self.prev_button.setShortcut(QKeySequence('Alt+Left'))
self.button_box.addButton(self.prev_button, self.button_box.ActionRole)
self.button_box.addButton(self.next_button, self.button_box.ActionRole)
self.prev_button.clicked.connect(partial(self.do_one, delta=-1))
@ -355,11 +358,13 @@ class MetadataSingleDialogBase(ResizableDialog):
next_ = self.db.title(self.row_list[self.current_row+1])
if next_ is not None:
tip = _('Save changes and edit the metadata of %s')%next_
tip = (_('Save changes and edit the metadata of %s')+
' [Alt+Right]')%next_
self.next_button.setToolTip(tip)
self.next_button.setVisible(next_ is not None)
if prev is not None:
tip = _('Save changes and edit the metadata of %s')%prev
tip = (_('Save changes and edit the metadata of %s')+
' [Alt+Left]')%prev
self.prev_button.setToolTip(tip)
self.prev_button.setVisible(prev is not None)
self(self.db.id(self.row_list[self.current_row]))

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -0,0 +1,108 @@
.. include:: global.rst
.. _subgroups-tutorial:
Managing subgroups of books, for example "genre"
==================================================
Some people wish to organize the books in their library into subgroups, similar to subfolders. The most common wish is to create genre hierarchies, but there are many others. One user asked for a way to organize textbooks by subject and course number. Another wanted to keep track of gifts by subject and recipient. I will use the genre example for the rest of this post.
Before I go on, please note that I am not talking about folders on the hard disk. Subgroups are not file folders. Books will not be copied anywhere. Calibre's library file structure is not affected. Instead, I am talking about a way to display subgroups of books within a calibre library.
.. contents::
:depth: 1
:local:
.. |sgtree| image:: images/sg_tree.jpg
:class: float-left-img
The commonly expressed requirements for subgroups such as genres are:
* A subgroup (e.g., a genre) must contain (point to) books, not categories of books. This is what distinguishes subgroups from user categories.
* A book can be in multiple subgroups (genres). This distinguishes subgroups from physical file folders.
* Subgroups (genres) must form a hierarchy; subgroups can contain subgroups.
|sgtree| Tags give you the first two. If you tag a book with the genre then you can use the tag browser (or search) for find the books with that genre, giving you the first. Many books can have the same tag, giving you the second. The problem is that tags don't satisfy the third requirement. They don't provide a hierarchy.
Calibre's new hierarchy feature gives you the third, the ability to see the genres in a 'tree' and the ability to easily search for books in genre or sub-genre. For example, assume that your genre structure is similar to the following::
Genre
. History
.. Japanese
.. Military
.. Roman
. Mysteries
.. English
.. Vampire
. Science Fiction
.. Alternate History
.. Military
.. Space Opera
. Thrillers
.. Crime
.. Horror
etc.
By using the hierarchy feature, you can see these genres in the tag browser in a tree form. As you can see, in this example the outermost level (Genre) is a custom column. The genres themselves appear under that column. Genres containing sub-genres appear with a small triangle next to them. Clicking on that triangle will open the item and show the sub-genres, as you see with History and Science Fiction.
Clicking on a genre will search for all books with that genre or children of that genre. For example, clicking on Science Fiction will give all three of the child genres, Alternate History, Military, and Space Opera. Clicking on Alternate History will give books in that genre, ignoring those in Military and Space Opera. Of course, a book can have multiple genres. If a book has both Space Opera and Military genres, then you see that book if you click on either genre. Searching is discussed in more detail below.
Another thing you can see from the image is that the genre Military appears twice, once under History and once under Science Fiction. Because the genres are in a hierarchy, these are two separate genres. A book can be in one, the other, or (doubtfully in this case) both. For example, Winston Churchill's World War II books could be in "History.Military". David Weber's Honor Harrington books could be in "Science Fiction.Military", and in "Science Fiction.Space Opera" for that matter.
Once a genre exists, that is the genre has been applied to at least one book, you can easily apply it to other books by dragging a book from the library view onto the genre you want the book to have. You can also apply them in the metadata editors. More on this below.
Setup
----------------------------------------
Your question by now might be "how did I set all of this up?". There are three steps: 1) create the custom column, 2) tell calibre that the new column is to be treated as a hierarchy, and 3) add genres.
I created the custom column in the usual way, using Preferences -> Add your own columns. I used "genre" as the lookup name and "Genre" as the column heading. The column type is "Comma-separated text, like tags, shown in the tag browser."
.. image:: images/sg_cc.jpg
:align: center
Then after restarting calibre, I told calibre that the column is to be treated as a hierarchy. I went to Preferences -> Look and Feel and entered the lookup name "#genre" into the "Categories with hierarchical items" box. I pressed Apply and was done with setting up.
.. image:: images/sg_pref.jpg
:align: center
At the point there are no genres. We are left with the last step: how to apply a genre to a book. A genre does not exist until it appears on at least one book. To apply a genre for the first time, we must go into some detail about what a genre looks like in the metadata for a book.
A hierarchy of 'things' is built by creating an item consisting of phrases separated by periods. Continuing the Genre example, these items would "History.Military", "Mysteries.Vampire", "Science Fiction.Space Opera", etc. Thus to create a new genre, you pick a book that should have that genre, edit its metadata, and enter the new genre into the column you created. Continuing my example, if I want to assign a new genre "Comics" with a sub-genre "Superheros" to a book, I would 'edit metadata' for that (comic) book, choose the Custom metadata tab, and then enter "Comics.Superheros" as shown in the following (ignore my other custom columns):
.. image:: images/sg_genre.jpg
:align: center
After I do the above, I see in the tag browser:
.. image:: images/sg_tb.jpg
:align: center
From here on, to apply this new genre to a book (a comic book, presumably), I can either drag the book onto the genre, or add it to the book using edit metadata in exactly the same way as I did above.
Searching
---------------
.. image:: images/sg_search.jpg
:align: center
The easiest way to search for genres is to use the tag browser, clicking on the genre you want to see. Clicking on a genre with children will show you books with that genre and all child genres. However, this might bring up a question. Just because a genre has children doesn't mean that it isn't a genre in its own right. For example, a book can have the genre "History" but not "History.Military". How do I search for books with only "History"?
The tag browser search mechanism knows if an item has children. If it does, clicking on the item cycles through 5 searches instead of the normal three. The first is the normal green plus, which shows you books with that genre only. The second is new: a doubled plus (shown below), which shows you books with that genre and all sub-genres. The third is the normal red minus, which shows you books without that exact genre. The fourth is new: a doubled minus, which shows you books without that genre or sub-genres. The fifth is back to the beginning, no mark, meaning no search.
Restrictions
---------------
If you search for a genre then create a saved search, you can use the 'restrict to' box to create a virtual library of books with that genre. This is most useful if you want to do other searches within the genre or to manage/update metadata. For this example I created a saved search named 'History.Japanese' by first clicking on the genre Japanese in the tag browser to get a search into the search box, entering History.Japanese into the saved search box, then pushing the "save search" button (the green box with the white plus, on the right-hand side).
.. image:: images/sg_restrict.jpg
:align: center
Once I have done that, then I can use this search as a restriction.
.. image:: images/sg_restrict2.jpg
:align: center

View File

@ -129,17 +129,17 @@ The functions available are:
* ``switch(pattern, value, pattern, value, ..., else_value)`` -- for each ``pattern, value`` pair, checks if the field matches the regular expression ``pattern`` and if so, returns that ``value``. If no ``pattern`` matches, then ``else_value`` is returned. You can have as many ``pattern, value`` pairs as you want.
* ``lookup(pattern, field, pattern, field, ..., else_field)`` -- like switch, except the arguments are field (metadata) names, not text. The value of the appropriate field will be fetched and used. Note that because composite columns are fields, you can use this function in one composite field to use the value of some other composite field. This is extremely useful when constructing variable save paths (more later).
* ``select(key)`` -- interpret the field as a comma-separated list of items, with the items being of the form "id:value". Find the pair with the id equal to key, and return the corresponding value. This function is particularly useful for extracting a value such as an isbn from the set of identifiers for a book.
* ``subitems(val, start_index, end_index)`` -- This function is used to break apart lists of tag-like hierarchical items such as genres. It interprets the value as a comma-separated list of tag-like items, where each item is a period-separated list. Returns a new list made by first finding all the period-separated tag-like items, then for each such item extracting the start_index`th to the `end_index`th components, then combining the results back together. The first component in a period-separated list has an index of zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples::
* ``subitems(val, start_index, end_index)`` -- This function is used to break apart lists of tag-like hierarchical items such as genres. It interprets the value as a comma-separated list of tag-like items, where each item is a period-separated list. Returns a new list made by first finding all the period-separated tag-like items, then for each such item extracting the `start_index` th to the `end_index` th components, then combining the results back together. The first component in a period-separated list has an index of zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples::
Assuming a #genre column containing "A.B.C":
{#genre:subitems(0,1)} returns "A"
{#genre:subitems(0,2)} returns "A.B"
{#genre:subitems(1,0)} returns "B.C"
Assuming a #genre column containing "A.B.C, D.E":
{#genre:subitems(0,1)} returns "A, D"
{#genre:subitems(0,2)} returns "A.B, D.E"
Assuming a #genre column containing "A.B.C":
{#genre:subitems(0,1)} returns "A"
{#genre:subitems(0,2)} returns "A.B"
{#genre:subitems(1,0)} returns "B.C"
Assuming a #genre column containing "A.B.C, D.E":
{#genre:subitems(0,1)} returns "A, D"
{#genre:subitems(0,2)} returns "A.B, D.E"
* ``sublist(val, start_index, end_index, separator)`` -- interpret the value as a list of items separated by `separator`, returning a new list made from the `start_index`th to the `end_index`th item. The first item is number zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples assuming that the tags column (which is comma-separated) contains "A, B ,C"::
* ``sublist(val, start_index, end_index, separator)`` -- interpret the value as a list of items separated by `separator`, returning a new list made from the `start_index` th to the `end_index` th item. The first item is number zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples assuming that the tags column (which is comma-separated) contains "A, B ,C"::
{tags:sublist(0,1,\,)} returns "A"
{tags:sublist(-1,0,\,)} returns "C"

View File

@ -12,6 +12,7 @@ Here you will find tutorials to get you started using |app|'s more advanced feat
:maxdepth: 1
news
sub_groups
xpath
template_lang
regexp