Sync with trunk. Revision 9165

This commit is contained in:
Li Fanxi 2011-05-08 17:36:50 +08:00
commit 7bd9cd20fe
229 changed files with 105834 additions and 91526 deletions

View File

@ -30,3 +30,4 @@ nbproject/
.project
.pydevproject
.settings/
*.DS_Store

View File

@ -19,6 +19,106 @@
# new recipes:
# - title:
- version: 0.8.0
date: 2010-05-06
new features:
- title: "Go to http://calibre-ebook.com/new-in/eight to see what's new in 0.8.0"
type: major
- version: 0.7.59
date: 2011-04-30
bug fixes:
- title: "Fixes a bug in 0.7.58 that caused too small fonts when converting to MOBI for the Kindle. Apologies."
- title: "Apple driver: Handle invalid EPUBs that do not contain an OPF file"
new recipes:
- title: The Big Picture and Auto industry news
author: welovelucy
- title: Gazeta Prawna
author: Vroo
- title: Various Czech news sources
author: Tomas Latal
- title: Diario de Ibiza
author: Joan Tur
- version: 0.7.58
date: 2011-04-29
new features:
- title: "Support for converting and reading metadata from Plucker format PDB files"
type: major
- title: "The metadata that is displayed in the book details panel on the right is now completely configurable via Preferences->Look & Feel"
- title: "Add a column that shows the date when the metadata of a book record was last modified in calibre. To see the column, right click on the column headers in calibre and select Show column->Modified. Note that the dates may be incorrect for books added with older versions of calibre."
- title: "Add command line option to shutdown running calibre"
- title: "CHM Input: Store extracted files in the input/ sub dir for easy debugging when --debug-pipeline is specified"
- title: "Add a popup menu to the 'Create saved search button' to allow easy deleting of saved searches"
bug fixes:
- title: "Fix regression that broke converting to LIT in 0.7.57"
tickets: [769334]
- title: "Conversion pipeline: Remove encoding declarations from input HTML documents to guarantee that there is only a single encoding declaration in the output HTML."
tickets: [773337]
- title: "Correctly parenthesize searches that are used to make search restrictions"
- title: "Fix ratings in save to disk templates not being divided by 2"
- title: "TXT to EPUB: Underlined words (following quotes?) fail to become italics"
tickets: [772267]
- title: "Fix template function source code unavailable when not running calibre from source"
- title: "Fix adding html books from the top of a deep folder hierarchy very slow"
- title: "Only set language in MOBI metadata if it is not null"
- title: "Fix 'count-of' searches (e.g., tags:#>3)."
tickets: [771175]
- title: "Fix regression that broke connection to iTunes in some cases"
tickets: [771164]
- title: "Fix buggy regex that made converting PDFs with the string ****************** very slow"
tickets: [770534]
- title: "Fix Ctrl+L shortcut to lookup word not working in ebook viewer"
tickets: [769492]
- title: "Fix regression that broke searching on boolean columns"
improved recipes:
- HBR Blogs
- The Marker
- Financial Times
- Clarin
- Honolulu Star Advertiser
new recipes:
- title: Novi Standard
author: Darko Miletic
- title: Autobild.ro and Social Diva
author: Silviu Cotoara
- title: Novinky
author: Tomas Latal
- title: "De Volksrant (subscriber version)"
author: Selcal
- version: 0.7.57
date: 2011-04-22

16
recipes/auto_blog.recipe Normal file
View File

@ -0,0 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AutoBlog(BasicNewsRecipe):
title = u'Auto Blog'
__author__ = 'Welovelucy'
language = 'en'
description = 'Auto industry news'
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'AutoBlog', u'http://www.autoblog.com/rss.xml')]
def print_version(self, url):
return url + 'print/'

55
recipes/autobild.recipe Normal file
View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
auto-bild.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AutoBild(BasicNewsRecipe):
title = u'Auto Bild'
__author__ = u'Silviu Cotoar\u0103'
description = 'Auto'
publisher = 'Auto Bild'
oldest_article = 50
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Auto'
encoding = 'utf-8'
cover_url = 'http://www.auto-bild.ro/images/autobild.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'box_2 articol clearfix'})
]
remove_tags = [
dict(name='div', attrs={'class':['detail']})
, dict(name='a', attrs={'id':['zoom_link']})
, dict(name='div', attrs={'class':['icons clearfix']})
, dict(name='div', attrs={'class':['pub_articol clearfix']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['pub_articol clearfix']})
]
feeds = [
(u'Feeds', u'http://www.auto-bild.ro/rss/toate')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,12 @@
from calibre.web.feeds.news import BasicNewsRecipe
class BigPicture(BasicNewsRecipe):
title = u'The Big Picture'
__author__ = 'Welovelucy'
description = ('Macro perspective on capital markets, economy, technology'
' and digital media')
language = 'en'
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'Big Picture', u'http://feeds.feedburner.com/TheBigPicture')]

View File

@ -3,7 +3,8 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
__version__ = '0.98' # 2011-04-10
__version__ = '0.98'
''' http://brandeins.de - Wirtschaftsmagazin '''
import re
import string
@ -13,8 +14,8 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
class BrandEins(BasicNewsRecipe):
title = u'brand eins'
__author__ = 'Constantin Hofstetter; Steffen Siebert'
description = u'Wirtschaftsmagazin: Gets the last full issue on default. Set a integer value for the username-field to get older issues: 1 -> the newest (but not complete) issue, 2 -> the last complete issue (default), 3 -> the issue before 2 etc.'
__author__ = 'Constantin Hofstetter'
description = u'Wirtschaftsmagazin'
publisher ='brandeins.de'
category = 'politics, business, wirtschaft, Germany'
use_embedded_content = False
@ -105,10 +106,11 @@ class BrandEins(BasicNewsRecipe):
keys = issue_map.keys()
keys.sort()
keys.reverse()
selected_issue = issue_map[keys[issue-1]]
selected_issue_key = keys[issue - 1]
selected_issue = issue_map[selected_issue_key]
url = selected_issue.get('href', False)
# Get the title for the magazin - build it out of the title of the cover - take the issue and year;
self.title = "brand eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d)", selected_issue.find('img').get('title', False)).group('date')
self.title = "brand eins " + selected_issue_key[4:] + "/" + selected_issue_key[0:4]
url = 'http://brandeins.de/'+url
# url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
@ -161,3 +163,4 @@ class BrandEins(BasicNewsRecipe):
current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
titles_and_articles.append([chapter_title, current_articles])
return titles_and_articles

View File

@ -0,0 +1,55 @@
__license__ = 'GPL v3'
__author__ = 'Joan Tur, based on El Pais version by Jordi Balcells & elargentino.com version by Darko Miletic'
description = 'Principal periodico de las islas Pitiusas, Ibiza y Formentera (Espanya) - v1.06 (29/04/2011)'
__docformat__ = 'restructuredtext en'
'''
diariodeibiza.es
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DiarioDeIbiza(BasicNewsRecipe):
__author__ = 'Joan Tur, cullet'
description = 'Principal periodico de las islas Pitiusas, Ibiza y Formentera (Espanya) - v1.06 (29/04/2011)'
cover_url = 'http://estaticos01.diariodeibiza.es//elementosWeb/mediaweb/images/logo.jpg'
title = u'Diario de Ibiza digital'
publisher = u'Editorial Prensa Iberica'
category = 'News, politics, culture, economy, general interest'
language = 'es'
encoding = 'iso-8859-1'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 2
max_articles_per_feed = 20
use_embedded_content = False
recursion = 5
remove_javascript = True
no_stylesheets = True
keep_only_tags = [
dict(name='div', attrs={'class':['noticia_titular','epigrafe','subtitulo','actualizada','noticia_fecha','noticia_texto']}),
dict(name='font', attrs={'class':['actualizada']})
]
feeds = [
(u'Portada de Ibiza', u'http://www.diariodeibiza.es/elementosInt/rss/1'),
(u'Pitiuses i Balears', u'http://www.diariodeibiza.es/elementosInt/rss/2'),
(u'Opini\xf3n', u'http://www.diariodeibiza.es/elementosInt/rss/3'),
(u'Nacional', u'http://www.diariodeibiza.es/elementosInt/rss/4'),
(u'Internacional', u'http://www.diariodeibiza.es/elementosInt/rss/5'),
(u'Econom\xeda', u'http://www.diariodeibiza.es/elementosInt/rss/6'),
(u'Deportes', u'http://www.diariodeibiza.es/elementosInt/rss/7'),
(u'Sociedad', u'http://www.diariodeibiza.es/elementosInt/rss/8'),
(u'Ciencia', u'http://www.diariodeibiza.es/elementosInt/rss/11'),
(u'Tecnolog\xeda', u'http://www.diariodeibiza.es/elementosInt/rss/12'),
(u'Gente', u'http://www.diariodeibiza.es/elementosInt/rss/13'),
(u'Sucesos', u'http://www.diariodeibiza.es/elementosInt/rss/15'),
(u'Cultura', u'http://www.diariodeibiza.es/elementosInt/rss/16Piti')
]

37
recipes/digizone.recipe Normal file
View File

@ -0,0 +1,37 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class DigiZoneCZ(BasicNewsRecipe):
title = 'DigiZone'
__author__ = 'Tomas Latal'
__version__ = '1.0'
__date__ = '30 April 2011'
description = u'Aktuality a \u010dl\xe1nky z DigiZone.cz'
oldest_article = 1
max_articles_per_feed = 10
encoding = 'iso-8859-2'
publisher = 'Internet Info s.r.o.'
category = 'digitalni vysilani, televize, CZ'
language = 'cs'
publication_type = 'newsportal'
no_stylesheets = True
remove_javascript = True
extra_css = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
p.perex img {display:none;} \
.urs p {margin: 0 0 0.8em 0;}'
feeds = [
(u'Aktuality', u'http://rss.digizone.cz/aktuality'),
(u'\u010cl\xe1nky', u'http://rss.digizone.cz/clanky')
]
remove_tags_before = dict(id=['p-article','p-actuality'])
remove_tags_after = dict(id=['p-article','p-actuality'])
remove_tags = [
dict(attrs={'class':['path','mth','lbtr','serial','enquiry','links','dp-n','side','op-ab','op-view','op-sub','op-list',]}),
dict(id=['opinions','discussionList','similarItems','sidebar','footer','opl','promo-box'])
]

View File

@ -12,7 +12,6 @@ class AdvancedUserRecipe1301860159(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'en_EN'
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
remove_tags = [dict(name='a'),dict(name='hr')]

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
foxnews.com
'''
@ -23,6 +23,7 @@ class FoxNews(BasicNewsRecipe):
extra_css = """
body{font-family: Arial,sans-serif }
.caption{font-size: x-small}
.author,.dateline{font-size: small}
"""
conversion_options = {
@ -34,12 +35,12 @@ class FoxNews(BasicNewsRecipe):
remove_attributes = ['xmlns','lang']
remove_tags = [
dict(name=['object','embed','link','script','iframe','meta','base'])
,dict(attrs={'class':['user-control','url-description','ad-context']})
]
remove_tags=[
dict(attrs={'class':['user-control','logo','ad-300x250','url-description']})
,dict(name=['meta','base','link','iframe','object','embed'])
]
remove_tags_before=dict(name='h1')
keep_only_tags=[dict(attrs={'id':'article-print'})]
remove_tags_after =dict(attrs={'class':'url-description'})
feeds = [
@ -55,3 +56,24 @@ class FoxNews(BasicNewsRecipe):
def print_version(self, url):
return url + 'print'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
__copyright__ = u'2010-2011, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
frazpc.pl
'''
@ -19,17 +19,20 @@ class FrazPC(BasicNewsRecipe):
use_embedded_content = False
no_stylesheets = True
feeds = [(u'Aktualno\u015bci', u'http://www.frazpc.pl/feed'), (u'Recenzje', u'http://www.frazpc.pl/kat/recenzje-2/feed') ]
keep_only_tags = [dict(name='div', attrs={'id':'FRAZ_CONTENT'})]
remove_tags = [dict(name='p', attrs={'class':'gray tagsP fs11'})]
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[(r'<div id="post-[0-9]*"', lambda match: '<div id="FRAZ_CONTENT"'),
(r'href="/f/news/', lambda match: 'href="http://www.frazpc.pl/f/news/'),
(r' &nbsp; <a href="http://www.frazpc.pl/[^>]*?">(Skomentuj|Komentarz(e)?\([0-9]*\))</a>&nbsp; \|', lambda match: '')]
feeds = [
(u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'),
(u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
]
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [
dict(name='div', attrs={'class':'title-wrapper'}),
dict(name='p', attrs={'class':'tags'}),
dict(name='p', attrs={'class':'article-links'}),
dict(name='div', attrs={'class':'comments_box'})
]
preprocess_regexps = [(re.compile(r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]
remove_attributes = [ 'width', 'height' ]

View File

@ -0,0 +1,53 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Vroo <vroobelek@iq.pl>'
__author__ = u'Vroo'
'''
gazetaprawna.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class gazetaprawna(BasicNewsRecipe):
version = 1
title = u'Gazeta Prawna'
__author__ = u'Vroo'
publisher = u'Infor Biznes'
oldest_article = 7
max_articles_per_feed = 20
no_stylesheets = True
remove_javascript = True
description = 'Polski dziennik gospodarczy'
language = 'pl'
encoding = 'utf-8'
remove_tags_after = [
dict(name='div', attrs={'class':['data-art']})
]
remove_tags = [
dict(name='div', attrs={'class':['dodatki_artykulu','data-art']})
]
feeds = [
(u'Wiadomo\u015bci - najwa\u017cniejsze', u'http://www.gazetaprawna.pl/wiadomosci/najwazniejsze/rss.xml'),
(u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'),
(u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'),
(u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'),
(u'Podatki i rachunkowo\u015b\u0107', u'http://podatki.gazetaprawna.pl/rss.xml')
]
def print_version(self, url):
url = url.replace('wiadomosci/artykuly', 'drukowanie')
url = url.replace('artykuly', 'drukowanie')
url = url.replace('porady', 'drukowanie')
url = url.replace('wywiady', 'drukowanie')
url = url.replace('orzeczenia', 'drukowanie')
url = url.replace('galeria', 'drukowanie')
url = url.replace('komentarze', 'drukowanie')
url = url.replace('biznes.gazetaprawna', 'www.gazetaprawna')
url = url.replace('podatki.gazetaprawna', 'www.gazetaprawna')
url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna')
url = url.replace('praca.gazetaprawna', 'www.gazetaprawna')
return url

View File

@ -1,9 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
# Needed for BLOGs
from calibre.web.feeds import Feed
class HBR(BasicNewsRecipe):
title = 'Harvard Business Review Blogs'
@ -32,6 +29,7 @@ class HBR(BasicNewsRecipe):
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
oldest_article = 30
max_articles_per_feed = 100
use_embedded_content = False
else:
timefmt = ' [%B %Y]'
@ -59,9 +57,9 @@ class HBR(BasicNewsRecipe):
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open(self.LOGIN_URL)
br.select_form(name='signInForm')
br['signInForm:username'] = self.username
br['signInForm:password'] = self.password
br.select_form(name='signin-form')
br['signin-form:username'] = self.username
br['signin-form:password'] = self.password
raw = br.submit().read()
if 'My Account' not in raw:
raise Exception('Failed to login, are you sure your username and password are correct?')
@ -161,27 +159,13 @@ class HBR(BasicNewsRecipe):
return startDate, endDate
#-------------------------------------------------------------------------------------------------
def hbr_parse_blogs(self, feeds):
# Do the "official" parse_feeds first
rssFeeds = Feed()
# Use the PARSE_FEEDS method to get a Feeds object of the articles
rssFeeds = BasicNewsRecipe.parse_feeds(self)
# Create a new feed of the right configuration and append to existing afeeds
self.feed_to_index_append(rssFeeds[:], feeds)
#-------------------------------------------------------------------------------------------------
def parse_index(self):
if self.INCLUDE_ARTICLES == True:
soup = self.hbr_get_toc()
feeds = self.hbr_parse_toc(soup)
else:
feeds = []
# blog stuff
if self.INCLUDE_BLOGS == True:
self.hbr_parse_blogs(feeds)
return BasicNewsRecipe.parse_index(self)
return feeds
#-------------------------------------------------------------------------------------------------

BIN
recipes/icons/autobild.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 614 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

View File

@ -16,7 +16,7 @@ class Jezebel(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
use_embedded_content = True
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/jezebel.com/img/logo.png'
extra_css = '''
@ -32,13 +32,12 @@ class Jezebel(BasicNewsRecipe):
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/jezebel/vip?format=xml')]
remove_tags = [
{'class': 'feedflare'},
]
feeds = [(u'Articles', u'http://feeds.gawker.com/jezebel/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,36 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
'''
Profile to download KoreaHerald
'''
from calibre.web.feeds.news import BasicNewsRecipe
class KoreaHerald(BasicNewsRecipe):
title = u'KoreaHerald'
language = 'en'
description = u'Korea Herald News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 10
recursions = 3
max_articles_per_feed = 10
no_stylesheets = True
keep_only_tags = [
dict(id=['contentLeft', '_article'])
]
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
]
feeds = [
('All News','http://www.koreaherald.com/rss/020000000000.xml'),
('National','http://www.koreaherald.com/rss/020100000000.xml'),
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
]

View File

@ -16,7 +16,7 @@ class Kotaku(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
use_embedded_content = True
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/kotaku.com/img/logo.png'
extra_css = '''
@ -31,13 +31,12 @@ class Kotaku(BasicNewsRecipe):
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/kotaku/vip?format=xml')]
remove_tags = [
{'class': 'feedflare'},
]
feeds = [(u'Articles', u'http://feeds.gawker.com/kotaku/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -48,7 +48,7 @@ class LeMonde(BasicNewsRecipe):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
return self.adeify_images(soup)
preprocess_regexps = [
(re.compile(r'([0-9])%'), lambda m: m.group(1) + '&nbsp;%'),

37
recipes/lupa.recipe Normal file
View File

@ -0,0 +1,37 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class LupaCZ(BasicNewsRecipe):
title = 'Lupa'
__author__ = 'Tomas Latal'
__version__ = '1.0'
__date__ = '30 April 2011'
description = u'Zpr\xe1vi\u010dky a \u010dl\xe1nky z Lupa.cz'
oldest_article = 2
max_articles_per_feed = 10
encoding = 'utf8'
publisher = 'Internet Info s.r.o.'
category = 'IT,news,CZ'
language = 'cs'
publication_type = 'newsportal'
no_stylesheets = True
remove_javascript = True
extra_css = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
p.perex img {display:none;} \
.urs p {margin: 0 0 0.8em 0;}'
feeds = [
(u'Zpr\xe1vi\u010dky', u'http://rss.lupa.cz/zpravicky'),
(u'\u010cl\xe1nky', u'http://rss.lupa.cz/clanky')
]
remove_tags_before = dict(id='main')
remove_tags_after = [dict(id='main')]
remove_tags = [
dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
]

37
recipes/mesec.recipe Normal file
View File

@ -0,0 +1,37 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class MesecCZ(BasicNewsRecipe):
title = u'M\u011b\u0161ec'
__author__ = 'Tomas Latal'
__version__ = '1.0'
__date__ = '30 April 2011'
description = u'Zpr\xe1vi\u010dky a \u010dl\xe1nky z Mesec.cz'
oldest_article = 1
max_articles_per_feed = 10
encoding = 'utf8'
publisher = 'Internet Info s.r.o.'
category = 'finance,CZ'
language = 'cs'
publication_type = 'newsportal'
no_stylesheets = True
remove_javascript = True
extra_css = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
p.perex img {display:none;} \
.urs p {margin: 0 0 0.8em 0;}'
feeds = [
(u'Aktuality', u'http://www.mesec.cz/rss/aktuality/'),
(u'\u010cl\xe1nky', u'http://www.mesec.cz/rss/clanky/')
]
remove_tags_before = dict(id='main')
remove_tags_after = [dict(id='main')]
remove_tags = [
dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
]

43
recipes/novinky.recipe Normal file
View File

@ -0,0 +1,43 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class NovinkyCZ(BasicNewsRecipe):
title = 'Novinky'
__author__ = 'Tomas Latal'
__version__ = '1.1'
__date__ = '30 April 2011'
description = 'News from server Novinky.cz'
oldest_article = 1
max_articles_per_feed = 10
encoding = 'utf8'
publisher = 'Novinky'
category = 'news, CZ'
language = 'cs'
publication_type = 'newsportal'
no_stylesheets = True
remove_javascript = True
cover_url = 'http://img193.imageshack.us/img193/3039/novinkycover.jpg'
extra_css = 'p.acmDescription{font-style:italic;} p.acmAuthor{font-size:0.8em; color:#707070}'
feeds = [
(u'Dom\xe1c\xed', u'http://www.novinky.cz/rss/domaci/'),
(u'Zahrani\u010d\xed', u'http://www.novinky.cz/rss/zahranicni/'),
(u'Krimi', u'http://www.novinky.cz/rss/krimi/'),
(u'Ekonomika', u'http://www.novinky.cz/rss/ekonomika/'),
(u'Finance', u'http://www.novinky.cz/rss/finance/'),
(u'Kultura', u'http://www.novinky.cz/rss/kultura/'),
(u'Koktejl', u'http://www.novinky.cz/rss/koktejl/'),
(u'Internet a PC', u'http://www.novinky.cz/rss/internet-a-pc/'),
(u'Auto-moto', u'http://www.novinky.cz/rss/auto/'),
]
remove_tags_before = dict(id='articleContent')
remove_tags_after = [dict(id='movedArticleAuthors')]
remove_tags = [
dict(name='div', attrs={'id':['articleColumnInfo','pictureInnerBox']}),
dict(name='p', attrs={'id':['articleDate']})
]

100
recipes/novistandard.recipe Normal file
View File

@ -0,0 +1,100 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.standard.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class NoviStandard(BasicNewsRecipe):
title = 'Novi Standard'
__author__ = 'Darko Miletic'
description = 'NoviStandard - energija je neunistiva!'
publisher = 'Novi Standard'
category = 'news, politics, Serbia'
no_stylesheets = True
delay = 1
oldest_article = 15
encoding = 'utf-8'
publication_type = 'magazine'
needs_subscription = 'optional'
remove_empty_feeds = True
INDEX = 'http://www.standard.rs/'
use_embedded_content = False
language = 'sr'
publication_type = 'magazine'
masthead_url = 'http://www.standard.rs/templates/ja_opal/images/red/logo.png'
extra_css = """
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Arial,"Segoe UI","Trebuchet MS",Helvetica,sans1,sans-serif}
.dropcap{font-family: Georgia,Times,serif1,serif; display:inline}
.dropcap:first-letter{display: inline; font-size: xx-large; font-weight: bold}
.contentheading{color: gray; font-size: x-large}
.article-meta, .createdby{color: red}
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None:
br.select_form(name='login')
br['username'] = self.username
br['passwd' ] = self.password
br.submit()
return br
keep_only_tags =[dict(attrs={'class':['contentheading','article-meta','article-content']})]
remove_tags_after =dict(attrs={'class':'extravote-container'})
remove_tags = [
dict(name=['object','link','iframe','meta','base'])
,dict(attrs={'class':'extravote-container'})
]
remove_attributes =['border','background','height','width','align','valign','lang']
feeds = [
(u'Naslovna', u'http://www.standard.rs/index.php?format=feed&type=rss')
,(u'Politika', u'http://www.standard.rs/vesti/36-politika.html?format=feed&type=rss')
,(u'Cvijanovic preporucuje', u'http://www.standard.rs/-cvijanovi-vam-preporuuje.html?format=feed&type=rss')
,(u'Kolumne', u'http://www.standard.rs/vesti/49-kolumne.html?format=feed&type=rss')
,(u'Kultura', u'http://www.standard.rs/vesti/40-kultura.html?format=feed&type=rss')
,(u'Lifestyle', u'http://www.standard.rs/vesti/39-lifestyle.html?format=feed&type=rss')
,(u'Svet', u'http://www.standard.rs/vesti/41-svet.html?format=feed&type=rss')
,(u'Ekonomija', u'http://www.standard.rs/vesti/37-ekonomija.html?format=feed&type=rss')
,(u'Sport', u'http://www.standard.rs/vesti/38-sport.html?format=feed&type=rss')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('div'):
if len(item.contents) == 0:
item.extract()
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

37
recipes/podnikatel.recipe Normal file
View File

@ -0,0 +1,37 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class PodnikatelCZ(BasicNewsRecipe):
title = 'Podnikatel'
__author__ = 'Tomas Latal'
__version__ = '1.0'
__date__ = '30 April 2011'
description = u'Aktuality a \u010dl\xe1nky z Podnikatel.cz'
oldest_article = 1
max_articles_per_feed = 10
encoding = 'utf8'
publisher = 'Internet Info s.r.o.'
category = 'podnikani, bussiness, CZ'
language = 'cs'
publication_type = 'newsportal'
no_stylesheets = True
remove_javascript = True
extra_css = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
p.perex img {display:none;} \
.urs p {margin: 0 0 0.8em 0;}'
feeds = [
(u'Aktuality', u'http://rss.podnikatel.cz/aktuality'),
(u'\u010cl\xe1nky', u'http://rss.podnikatel.cz/clanky')
]
remove_tags_before = dict(id='art-content')
remove_tags_after = [dict(id='art-content')]
remove_tags = [
dict(attrs={'class':['socialshare','box-blue','author clear','labels-terms','box diskuze','ad','page-nav right','infobox','box zpravy','s-clanky']}),
dict(id=['path','article-tools','discussionList','similarItems','promo-box'])
]

54
recipes/socialdiva.recipe Normal file
View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011'
'''
socialdiva.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class SocialDiva(BasicNewsRecipe):
title = u'Social Diva'
__author__ = u'Silviu Cotoara'
description = u'When in doubt, wear red'
publisher = 'Social Diva'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Femei'
encoding = 'utf-8'
cover_url = 'http://www.socialdiva.ro/images/logo.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'col-alpha mt5 content_articol'}),
dict(name='div', attrs={'class':'mt5'})
]
remove_tags = [
dict(name='a', attrs={'class':['comments float-left scroll mt5']}),
dict(name='a', attrs={'class':['comments float-left scroll']}),
dict(name='div', attrs={'class':['rating-container relative float-left']}),
dict(name='div', attrs={'class':['float-right social_articol']})
]
remove_tags_after = [
dict(name='a', attrs={'class':['comments float-left scroll mt5']})
]
feeds = [
(u'Feeds', u'http://www.socialdiva.ro/rss.html')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -18,7 +18,7 @@ class TelepolisNews(BasicNewsRecipe):
recursion = 0
no_stylesheets = True
encoding = "utf-8"
language = 'de_AT'
language = 'de'
use_embedded_content =False
remove_empty_feeds = True

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
description = 'TheMarker Financial News in Hebrew'
__author__ = 'TonyTheBookworm, Marbs'
__author__ = 'Marbs'
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
title = u'TheMarker'
language = 'he'
@ -11,42 +11,38 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
remove_javascript = True
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ]
max_articles_per_feed = 10
keep_only_tags =dict(name='div', attrs={'id':'content'})
remove_attributes = ['width','float','margin-left']
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class':['social-nav article-social-nav','prsnlArticleEnvelope','cb']}) ,
dict(name='a', attrs={'href':['/misc/mobile']}) ,
dict(name='span', attrs={'class':['post-summ']}) ]
max_articles_per_feed = 100
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
(u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
(u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
(u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
(u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
(u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
(u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
(u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
(u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
(u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
(u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
feeds = [(u'Head Lines', u'http://www.themarker.com/cmlink/1.144'),
(u'TA Market', u'http://www.themarker.com/cmlink/1.243'),
(u'Real Estate', u'http://www.themarker.com/cmlink/1.605656'),
(u'Global', u'http://www.themarker.com/cmlink/1.605658'),
(u'Wall Street', u'http://www.themarker.com/cmlink/1.613713'),
(u'SmartPhone', u'http://www.themarker.com/cmlink/1.605661'),
(u'Law', u'http://www.themarker.com/cmlink/1.605664'),
(u'Media', u'http://www.themarker.com/cmlink/1.605660'),
(u'Consumer', u'http://www.themarker.com/cmlink/1.605662'),
(u'Career', u'http://www.themarker.com/cmlink/1.605665'),
(u'Car', u'http://www.themarker.com/cmlink/1.605663'),
(u'High Tech', u'http://www.themarker.com/cmlink/1.605659'),
(u'Small Business', u'http://www.themarker.com/cmlink/1.605666')]
def print_version(self, url):
split1 = url.split("=")
weblinks = url
#split1 = url.split("/")
#print_url='http://www.themarker.com/misc/article-print-page/'+split1[-1]
txt=url
if weblinks is not None:
for link in weblinks:
#---------------------------------------------------------
#here we need some help with some regexpressions
#we are trying to find it.themarker.com in a url
#-----------------------------------------------------------
re1='.*?' # Non-greedy match on filler
re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
m = rg.search(url)
re1='.*?' # Non-greedy match on filler
re2='(tv)' # Word 1
if m:
split2 = url.split("article/")
print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
else:
print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
return print_url
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
m = rg.search(txt)
if m:
#print 'bad link'
return 1

View File

@ -10,6 +10,8 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe
class Time(BasicNewsRecipe):
recipe_disabled = ('This recipe has been disabled as TIME no longer'
' publish complete articles on the web.')
title = u'Time'
__author__ = 'Kovid Goyal and Sujata Raman'
description = 'Weekly magazine'

View File

@ -7,13 +7,11 @@ usatoday.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag
import re
class USAToday(BasicNewsRecipe):
title = 'USA Today'
__author__ = 'GRiker'
__author__ = 'Kovid Goyal'
oldest_article = 1
timefmt = ''
max_articles_per_feed = 20
@ -31,7 +29,6 @@ class USAToday(BasicNewsRecipe):
margin-bottom: 0em; \
font-size: smaller;}\n \
.articleBody {text-align: left;}\n '
conversion_options = { 'linearize_tables' : True }
#simultaneous_downloads = 1
feeds = [
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
@ -47,63 +44,26 @@ class USAToday(BasicNewsRecipe):
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
]
keep_only_tags = [dict(attrs={'class':[
'byLine',
'inside-copy',
'inside-head',
'inside-head2',
'item',
'item-block',
'photo-container',
]}),
dict(id=[
'applyMainStoryPhoto',
'permalink',
])]
keep_only_tags = [dict(attrs={'class':'story'})]
remove_tags = [
dict(attrs={'class':[
'share',
'reprints',
'inline-h3',
'info-extras',
'ppy-outer',
'ppy-caption',
'comments',
'jump',
'pagetools',
'post-attributes',
'tags',
'bottom-tools',
'sponsoredlinks',
]}),
dict(id=['pluck']),
]
remove_tags = [dict(attrs={'class':[
'comments',
'jump',
'pagetools',
'post-attributes',
'tags',
]}),
dict(id=[])]
#feeds = [('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles')]
def dump_hex(self, src, length=16):
''' Diagnostic '''
FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
N=0; result=''
while src:
s,src = src[:length],src[length:]
hexa = ' '.join(["%02X"%ord(x) for x in s])
s = s.translate(FILTER)
result += "%04X %-*s %s\n" % (N, length*3, hexa, s)
N+=length
print result
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","&#8216;",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","&#8217;",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","&#8220;",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","&#8221;",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","&#8211;",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","&#8212;",fixed)
return fixed
def get_masthead_url(self):
masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif'
@ -115,321 +75,4 @@ class USAToday(BasicNewsRecipe):
masthead = None
return masthead
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&#38;'
massaged = re.sub("&","&#38;", massaged)
return self.fixChars(massaged)
else:
return description
def parse_feeds(self, *args, **kwargs):
parsed_feeds = BasicNewsRecipe.parse_feeds(self, *args, **kwargs)
# Count articles for progress dialog
article_count = 0
for feed in parsed_feeds:
article_count += len(feed)
self.log( "Queued %d articles" % article_count)
return parsed_feeds
def preprocess_html(self, soup):
soup = self.strip_anchors(soup)
return soup
def postprocess_html(self, soup, first_fetch):
# Remove navLinks <div class="inside-copy" style="padding-bottom:3px">
navLinks = soup.find(True,{'style':'padding-bottom:3px'})
if navLinks:
navLinks.extract()
# Remove <div class="inside-copy" style="margin-bottom:10px">
gibberish = soup.find(True,{'style':'margin-bottom:10px'})
if gibberish:
gibberish.extract()
# Change <inside-head> to <h2>
headline = soup.find(True, {'class':['inside-head','inside-head2']})
if not headline:
headline = soup.find('h3')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, headline.contents[0])
headline.replaceWith(tag)
else:
print "unable to find headline:\n%s\n" % soup
# Change byLine to byline, change commas to middot
# Kindle renders commas in byline as '&'
byline = soup.find(True, {'class':'byLine'})
if byline:
byline['class'] = 'byline'
# Replace comma with middot
byline.contents[0].replaceWith(re.sub(","," &middot;", byline.renderContents()))
jumpout_punc_list = [':','?']
# Remove the inline jumpouts in <div class="inside-copy">
paras = soup.findAll(True, {'class':'inside-copy'})
for para in paras:
if re.match("<b>[\w\W]+ ",para.renderContents()):
p = para.find('b')
for punc in jumpout_punc_list:
punc_offset = p.contents[0].find(punc)
if punc_offset == -1:
continue
if punc_offset > 1:
if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
#print "extracting \n%s\n" % para.prettify()
para.extract()
# Reset class for remaining
paras = soup.findAll(True, {'class':'inside-copy'})
for para in paras:
para['class'] = 'articleBody'
# Remove inline jumpouts in <p>
paras = soup.findAll(['p'])
for p in paras:
if hasattr(p,'contents') and len(p.contents):
for punc in jumpout_punc_list:
punc_offset = p.contents[0].find(punc)
if punc_offset == -1:
continue
if punc_offset > 2 and hasattr(p,'a') and len(p.contents):
#print "evaluating %s\n" % p.contents[0][:punc_offset+1]
if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
#print "extracting \n%s\n" % p.prettify()
p.extract()
# Capture the first img, insert after headline
imgs = soup.findAll('img')
print "postprocess_html(): %d images" % len(imgs)
if imgs:
divTag = Tag(soup, 'div')
divTag['class'] = 'image'
body = soup.find('body')
img = imgs[0]
#print "img: \n%s\n" % img.prettify()
# Table for photo and credit
tableTag = Tag(soup,'table')
# Photo
trimgTag = Tag(soup, 'tr')
tdimgTag = Tag(soup, 'td')
tdimgTag.insert(0,img)
trimgTag.insert(0,tdimgTag)
tableTag.insert(0,trimgTag)
# Credit
trcreditTag = Tag(soup, 'tr')
tdcreditTag = Tag(soup, 'td')
tdcreditTag['class'] = 'credit'
credit = soup.find('td',{'class':'photoCredit'})
if credit:
tdcreditTag.insert(0,NavigableString(credit.renderContents()))
else:
credit = img['credit']
if credit:
tdcreditTag.insert(0,NavigableString(credit))
else:
tdcreditTag.insert(0,NavigableString(''))
trcreditTag.insert(0,tdcreditTag)
tableTag.insert(1,trcreditTag)
dtc = 0
divTag.insert(dtc,tableTag)
dtc += 1
if False:
# Add the caption in the table
tableCaptionTag = Tag(soup,'caption')
tableCaptionTag.insert(0,soup.find('td',{'class':'photoCredit'}).renderContents())
tableTag.insert(1,tableCaptionTag)
divTag.insert(dtc,tableTag)
dtc += 1
body.insert(1,divTag)
else:
# Add the caption below the table
#print "Looking for caption in this soup:\n%s" % img.prettify()
captionTag = Tag(soup,'p')
captionTag['class'] = 'caption'
if hasattr(img,'alt') and img['alt']:
captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['alt']))
divTag.insert(dtc, captionTag)
dtc += 1
else:
try:
captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['cutline']))
divTag.insert(dtc, captionTag)
dtc += 1
except:
pass
hrTag = Tag(soup, 'hr')
divTag.insert(dtc, hrTag)
dtc += 1
# Delete <div id="applyMainStoryPhoto"
photoJunk = soup.find('div',{'id':'applyMainStoryPhoto'})
if photoJunk:
photoJunk.extract()
# Insert img after headline
tag = body.find(True)
insertLoc = 0
headline_found = False
while True:
# Scan the top-level tags
insertLoc += 1
if hasattr(tag,'class') and tag['class'] == 'headline':
headline_found = True
body.insert(insertLoc,divTag)
break
tag = tag.nextSibling
if not tag:
break
if not headline_found:
# Monolithic <div> - restructure
tag = body.find(True)
while True:
insertLoc += 1
try:
if hasattr(tag,'class') and tag['class'] == 'headline':
headline_found = True
tag.insert(insertLoc,divTag)
break
except:
pass
tag = tag.next
if not tag:
break
# Yank out headline, img and caption
headline = body.find('h2','headline')
img = body.find('div','image')
caption = body.find('p''class')
# body(0) is calibre_navbar
# body(1) is <div class="item">
btc = 1
headline.extract()
body.insert(1, headline)
btc += 1
if img:
img.extract()
body.insert(btc, img)
btc += 1
if caption:
caption.extract()
body.insert(btc, caption)
btc += 1
if len(imgs) > 1:
if True:
[img.extract() for img in imgs[1:]]
else:
# Format the remaining images
# This doesn't work yet
for img in imgs[1:]:
print "img:\n%s\n" % img.prettify()
divTag = Tag(soup, 'div')
divTag['class'] = 'image'
# Table for photo and credit
tableTag = Tag(soup,'table')
# Photo
trimgTag = Tag(soup, 'tr')
tdimgTag = Tag(soup, 'td')
tdimgTag.insert(0,img)
trimgTag.insert(0,tdimgTag)
tableTag.insert(0,trimgTag)
# Credit
trcreditTag = Tag(soup, 'tr')
tdcreditTag = Tag(soup, 'td')
tdcreditTag['class'] = 'credit'
try:
tdcreditTag.insert(0,NavigableString(img['credit']))
except:
tdcreditTag.insert(0,NavigableString(''))
trcreditTag.insert(0,tdcreditTag)
tableTag.insert(1,trcreditTag)
divTag.insert(0,tableTag)
soup.img.replaceWith(divTag)
return soup
def postprocess_book(self, oeb, opts, log) :
def extract_byline(href) :
# <meta name="byline" content=
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
byline = soup.find('div',attrs={'class':'byline'})
if byline:
byline['class'] = 'byline'
# Replace comma with middot
byline.contents[0].replaceWith(re.sub(u",", u" &middot;",
byline.renderContents(encoding=None)))
return byline.renderContents(encoding=None)
else :
paras = soup.findAll(text=True)
for para in paras:
if para.startswith("Copyright"):
return para[len('Copyright xxxx '):para.find('.')]
return None
def extract_description(href) :
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
description = soup.find('meta',attrs={'name':'description'})
if description :
return self.massageNCXText(description['content'])
else:
# Take first paragraph of article
articleBody = soup.find('div',attrs={'id':['articleBody','item']})
if articleBody:
paras = articleBody.findAll('p')
for p in paras:
if p.renderContents() > '' :
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
else:
print "Didn't find <div id='articleBody'> in this soup:\n%s" % soup.prettify()
return None
# Method entry point here
# Single section toc looks different than multi-section tocs
if oeb.toc.depth() == 2 :
for article in oeb.toc :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)
elif oeb.toc.depth() == 3 :
for section in oeb.toc :
for article in section :
article.author = extract_byline(article.href)
'''
if article.author is None :
article.author = self.massageNCXText(extract_byline(article.href))
else:
article.author = self.massageNCXText(article.author)
'''
if article.description is None :
article.description = extract_description(article.href)
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup

39
recipes/vitalia.recipe Normal file
View File

@ -0,0 +1,39 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class VitaliaCZ(BasicNewsRecipe):
title = 'Vitalia'
__author__ = 'Tomas Latal'
__version__ = '1.0'
__date__ = '30 April 2011'
description = u'Aktuality a \u010dl\xe1nky z Vitalia.cz'
oldest_article = 1
max_articles_per_feed = 10
encoding = 'utf8'
publisher = 'Internet Info s.r.o.'
category = 'zdravi, vztahy, wellness, CZ'
language = 'cs'
publication_type = 'newsportal'
no_stylesheets = True
remove_javascript = True
extra_css = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0; line-height: 1.4; padding: 0 0 10px 0; font-weight: bold;} \
p.perex img {display:none;} \
span.author {font-size:0.8em; font-style:italic} \
.urs div.rs-tip-major {padding:0.5em; background: #e0e0e0 none repeat scroll 0 0;border: 1px solid #909090;} \
.urs p {margin: 0 0 0.8em 0;}'
feeds = [
(u'Aktuality', 'http://www.vitalia.cz/rss/aktuality/'),
(u'\u010cl\xe1nky', u'http://www.vitalia.cz/rss/clanky/'),
]
remove_tags_before = dict(id='main')
remove_tags_after = [dict(id='main')]
remove_tags = [
dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
]

View File

@ -0,0 +1,115 @@
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Volkskrant_full(BasicNewsRecipe):
# This recipe will download the Volkskrant newspaper,
# from the subscribers site. It requires a password.
# Known issues are: articles that are spread out over
# multiple pages will appear multiple times. Pages
# that contain only adverts will appear, but empty.
# The supplement 'Volkskrant Magazine' on saturday
# is currently not downloaded.
# You can set a manual date, to download an archived
# newspaper. Volkskrant stores over a month at the
# moment of writing. To do so I suggest you unmark
# the date on the line below, and insert it in the title. Then
# follow the instructions marked further below.
title = 'De Volkskrant (subscription)' # [za, 13 nov 2010]'
__author__ = u'Selcal'
description = u"Volkskrant"
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
language = 'nl'
use_embedded_content = False
simultaneous_downloads = 1
delay = 1
needs_subscription = True
# Set RETRIEVEDATE to 'yyyymmdd' to load an older
# edition. Otherwise keep '%Y%m%d'
# When setting a manual date, unmark and add the date
# to the title above, and unmark the timefmt line to stop
# Calibre from adding today's date in addition.
# timefmt = ''
RETRIEVEDATE = strftime('%Y%m%d')
INDEX_MAIN = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/#text'
INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/'
LOGIN = 'http://www.volkskrant.nl/vk/user/loggedIn.do'
remove_tags = [dict(name='address')]
cover_url = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/page.jpg'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(nr = 0)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
krant = []
def strip_title(_title):
i = 0
while ((_title[i] <> ":") and (i <= len(_title))):
i = i + 1
return(_title[0:i])
for temp in range (5):
try:
soup = self.index_to_soup(self.INDEX_MAIN)
break
except:
#print '(Retrying main index load)'
continue
mainsoup = soup.find('td', attrs={'id': 'select_page_top'})
for option in mainsoup.findAll('option'):
articles = []
_INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text'
_INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/'
#print ''
#print '<------- Processing section: ' + _INDEX + ' ------------------------->'
for temp in range (5):
try:
soup = self.index_to_soup(_INDEX)
break
except:
#print '(Retrying index load)'
continue
for item in soup.findAll('area'):
art_nr = item['class']
attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)]
#print '==> Found: ' + attrname;
index_title = soup.find('div', attrs={'class': attrname})
get_title = index_title['title'];
_ARTICLE = _INDEX_ARTICLE + attrname + '.html#text'
title = get_title;
#print '--> Title: ' + title;
#print '--> URL: ' + _ARTICLE;
for temp in range (5):
try:
souparticle = self.index_to_soup(_ARTICLE);
break
except:
print '(Retrying URL load)'
continue
headerurl = souparticle.findAll('frame')[0]['src'];
#print '--> Read frame name for header: ' + headerurl;
url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html';
#print '--> Corrected URL: ' + url;
if (get_title <> ''):
title = strip_title(get_title)
date = strftime(' %B %Y')
if (title <> ''):
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':''
})
krant.append( (option.string, articles))
return krant

View File

@ -118,6 +118,7 @@ sort_columns_at_startup = None
# timestamp default if not set: dd MMM yyyy
gui_pubdate_display_format = 'MMM yyyy'
gui_timestamp_display_format = 'dd MMM yyyy'
gui_last_modified_display_format = 'dd MMM yyyy'
#: Control sorting of titles and series in the library display
# Control title and series sorting in the library view. If set to

View File

@ -7,17 +7,30 @@ CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE,
sort TEXT COLLATE NOCASE,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
uri TEXT,
series_index INTEGER NOT NULL DEFAULT 1,
pubdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
series_index REAL NOT NULL DEFAULT 1.0,
author_sort TEXT COLLATE NOCASE,
isbn TEXT DEFAULT "" COLLATE NOCASE,
path TEXT NOT NULL DEFAULT ""
);
lccn TEXT DEFAULT "" COLLATE NOCASE,
path TEXT NOT NULL DEFAULT "",
flags INTEGER NOT NULL DEFAULT 1
, uuid TEXT, has_cover BOOL DEFAULT 0, last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00");
CREATE TABLE books_authors_link ( id INTEGER PRIMARY KEY,
book INTEGER NOT NULL,
author INTEGER NOT NULL,
UNIQUE(book, author)
);
CREATE TABLE books_languages_link ( id INTEGER PRIMARY KEY,
book INTEGER NOT NULL,
lang_code INTEGER NOT NULL,
item_order INTEGER NOT NULL DEFAULT 0,
UNIQUE(book, lang_code)
);
CREATE TABLE books_plugin_data(id INTEGER PRIMARY KEY,
book INTEGER NON NULL,
name TEXT NON NULL,
val TEXT NON NULL,
UNIQUE(book,name));
CREATE TABLE books_publishers_link ( id INTEGER PRIMARY KEY,
book INTEGER NOT NULL,
publisher INTEGER NOT NULL,
@ -49,11 +62,51 @@ CREATE TABLE conversion_options ( id INTEGER PRIMARY KEY,
data BLOB NOT NULL,
UNIQUE(format,book)
);
CREATE TABLE custom_columns (
id INTEGER PRIMARY KEY AUTOINCREMENT,
label TEXT NOT NULL,
name TEXT NOT NULL,
datatype TEXT NOT NULL,
mark_for_delete BOOL DEFAULT 0 NOT NULL,
editable BOOL DEFAULT 1 NOT NULL,
display TEXT DEFAULT "{}" NOT NULL,
is_multiple BOOL DEFAULT 0 NOT NULL,
normalized BOOL NOT NULL,
UNIQUE(label)
);
CREATE TABLE data ( id INTEGER PRIMARY KEY,
book INTEGER NON NULL,
format TEXT NON NULL COLLATE NOCASE,
uncompressed_size INTEGER NON NULL,
name TEXT NON NULL,
UNIQUE(book, format)
);
CREATE TABLE feeds ( id INTEGER PRIMARY KEY,
title TEXT NOT NULL,
script TEXT NOT NULL,
UNIQUE(title)
);
CREATE TABLE identifiers ( id INTEGER PRIMARY KEY,
book INTEGER NON NULL,
type TEXT NON NULL DEFAULT "isbn" COLLATE NOCASE,
val TEXT NON NULL COLLATE NOCASE,
UNIQUE(book, type)
);
CREATE TABLE languages ( id INTEGER PRIMARY KEY,
lang_code TEXT NON NULL COLLATE NOCASE,
UNIQUE(lang_code)
);
CREATE TABLE library_id ( id INTEGER PRIMARY KEY,
uuid TEXT NOT NULL,
UNIQUE(uuid)
);
CREATE TABLE metadata_dirtied(id INTEGER PRIMARY KEY,
book INTEGER NOT NULL,
UNIQUE(book));
CREATE TABLE preferences(id INTEGER PRIMARY KEY,
key TEXT NON NULL,
val TEXT NON NULL,
UNIQUE(key));
CREATE TABLE publishers ( id INTEGER PRIMARY KEY,
name TEXT NOT NULL COLLATE NOCASE,
sort TEXT COLLATE NOCASE,
@ -72,34 +125,143 @@ CREATE TABLE tags ( id INTEGER PRIMARY KEY,
name TEXT NOT NULL COLLATE NOCASE,
UNIQUE (name)
);
CREATE TABLE data ( id INTEGER PRIMARY KEY,
book INTEGER NON NULL,
format TEXT NON NULL COLLATE NOCASE,
uncompressed_size INTEGER NON NULL,
name TEXT NON NULL,
UNIQUE(book, format)
);
CREATE VIEW meta AS
SELECT id, title,
(SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors,
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
timestamp,
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
(SELECT text FROM comments WHERE book=books.id) comments,
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
series_index,
sort,
author_sort,
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
isbn
FROM books;
SELECT id, title,
(SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
timestamp,
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
(SELECT text FROM comments WHERE book=books.id) comments,
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
series_index,
sort,
author_sort,
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
isbn,
path,
lccn,
pubdate,
flags,
uuid
FROM books;
CREATE VIEW tag_browser_authors AS SELECT
id,
name,
(SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) count,
(SELECT AVG(ratings.rating)
FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.author=authors.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
sort AS sort
FROM authors;
CREATE VIEW tag_browser_filtered_authors AS SELECT
id,
name,
(SELECT COUNT(books_authors_link.id) FROM books_authors_link WHERE
author=authors.id AND books_list_filter(book)) count,
(SELECT AVG(ratings.rating)
FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.author=authors.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0 AND
books_list_filter(bl.book)) avg_rating,
sort AS sort
FROM authors;
CREATE VIEW tag_browser_filtered_publishers AS SELECT
id,
name,
(SELECT COUNT(books_publishers_link.id) FROM books_publishers_link WHERE
publisher=publishers.id AND books_list_filter(book)) count,
(SELECT AVG(ratings.rating)
FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0 AND
books_list_filter(bl.book)) avg_rating,
name AS sort
FROM publishers;
CREATE VIEW tag_browser_filtered_ratings AS SELECT
id,
rating,
(SELECT COUNT(books_ratings_link.id) FROM books_ratings_link WHERE
rating=ratings.id AND books_list_filter(book)) count,
(SELECT AVG(ratings.rating)
FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.rating=ratings.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0 AND
books_list_filter(bl.book)) avg_rating,
rating AS sort
FROM ratings;
CREATE VIEW tag_browser_filtered_series AS SELECT
id,
name,
(SELECT COUNT(books_series_link.id) FROM books_series_link WHERE
series=series.id AND books_list_filter(book)) count,
(SELECT AVG(ratings.rating)
FROM books_series_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.series=series.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0 AND
books_list_filter(bl.book)) avg_rating,
(title_sort(name)) AS sort
FROM series;
CREATE VIEW tag_browser_filtered_tags AS SELECT
id,
name,
(SELECT COUNT(books_tags_link.id) FROM books_tags_link WHERE
tag=tags.id AND books_list_filter(book)) count,
(SELECT AVG(ratings.rating)
FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.tag=tags.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0 AND
books_list_filter(bl.book)) avg_rating,
name AS sort
FROM tags;
CREATE VIEW tag_browser_publishers AS SELECT
id,
name,
(SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) count,
(SELECT AVG(ratings.rating)
FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
name AS sort
FROM publishers;
CREATE VIEW tag_browser_ratings AS SELECT
id,
rating,
(SELECT COUNT(id) FROM books_ratings_link WHERE rating=ratings.id) count,
(SELECT AVG(ratings.rating)
FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.rating=ratings.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
rating AS sort
FROM ratings;
CREATE VIEW tag_browser_series AS SELECT
id,
name,
(SELECT COUNT(id) FROM books_series_link WHERE series=series.id) count,
(SELECT AVG(ratings.rating)
FROM books_series_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.series=series.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
(title_sort(name)) AS sort
FROM series;
CREATE VIEW tag_browser_tags AS SELECT
id,
name,
(SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) count,
(SELECT AVG(ratings.rating)
FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.tag=tags.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
name AS sort
FROM tags;
CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE);
CREATE INDEX books_authors_link_aidx ON books_authors_link (author);
CREATE INDEX books_authors_link_bidx ON books_authors_link (book);
CREATE INDEX books_idx ON books (sort COLLATE NOCASE);
CREATE INDEX books_languages_link_aidx ON books_languages_link (lang_code);
CREATE INDEX books_languages_link_bidx ON books_languages_link (book);
CREATE INDEX books_publishers_link_aidx ON books_publishers_link (publisher);
CREATE INDEX books_publishers_link_bidx ON books_publishers_link (book);
CREATE INDEX books_ratings_link_aidx ON books_ratings_link (rating);
@ -111,32 +273,38 @@ CREATE INDEX books_tags_link_bidx ON books_tags_link (book);
CREATE INDEX comments_idx ON comments (book);
CREATE INDEX conversion_options_idx_a ON conversion_options (format COLLATE NOCASE);
CREATE INDEX conversion_options_idx_b ON conversion_options (book);
CREATE INDEX custom_columns_idx ON custom_columns (label);
CREATE INDEX data_idx ON data (book);
CREATE INDEX formats_idx ON data (format);
CREATE INDEX languages_idx ON languages (lang_code COLLATE NOCASE);
CREATE INDEX publishers_idx ON publishers (name COLLATE NOCASE);
CREATE INDEX series_idx ON series (sort COLLATE NOCASE);
CREATE INDEX series_idx ON series (name COLLATE NOCASE);
CREATE INDEX tags_idx ON tags (name COLLATE NOCASE);
CREATE TRIGGER books_delete_trg
AFTER DELETE ON books
BEGIN
DELETE FROM books_authors_link WHERE book=OLD.id;
DELETE FROM books_publishers_link WHERE book=OLD.id;
DELETE FROM books_ratings_link WHERE book=OLD.id;
DELETE FROM books_series_link WHERE book=OLD.id;
DELETE FROM books_tags_link WHERE book=OLD.id;
DELETE FROM data WHERE book=OLD.id;
DELETE FROM comments WHERE book=OLD.id;
DELETE FROM conversion_options WHERE book=OLD.id;
AFTER DELETE ON books
BEGIN
DELETE FROM books_authors_link WHERE book=OLD.id;
DELETE FROM books_publishers_link WHERE book=OLD.id;
DELETE FROM books_ratings_link WHERE book=OLD.id;
DELETE FROM books_series_link WHERE book=OLD.id;
DELETE FROM books_tags_link WHERE book=OLD.id;
DELETE FROM books_languages_link WHERE book=OLD.id;
DELETE FROM data WHERE book=OLD.id;
DELETE FROM comments WHERE book=OLD.id;
DELETE FROM conversion_options WHERE book=OLD.id;
DELETE FROM books_plugin_data WHERE book=OLD.id;
DELETE FROM identifiers WHERE book=OLD.id;
END;
CREATE TRIGGER books_insert_trg
AFTER INSERT ON books
CREATE TRIGGER books_insert_trg AFTER INSERT ON books
BEGIN
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
UPDATE books SET sort=title_sort(NEW.title),uuid=uuid4() WHERE id=NEW.id;
END;
CREATE TRIGGER books_update_trg
AFTER UPDATE ON books
BEGIN
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
END;
AFTER UPDATE ON books
BEGIN
UPDATE books SET sort=title_sort(NEW.title)
WHERE id=NEW.id AND OLD.title <> NEW.title;
END;
CREATE TRIGGER fkc_comments_insert
BEFORE INSERT ON comments
BEGIN
@ -169,23 +337,41 @@ CREATE TRIGGER fkc_data_update
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
END;
END;
CREATE TRIGGER fkc_delete_books_authors_link
CREATE TRIGGER fkc_delete_on_authors
BEFORE DELETE ON authors
BEGIN
SELECT CASE
WHEN (SELECT COUNT(id) FROM books_authors_link WHERE book=OLD.book) > 0
THEN RAISE(ABORT, 'Foreign key violation: author is still referenced')
WHEN (SELECT COUNT(id) FROM books_authors_link WHERE author=OLD.id) > 0
THEN RAISE(ABORT, 'Foreign key violation: authors is still referenced')
END;
END;
CREATE TRIGGER fkc_delete_books_publishers_link
CREATE TRIGGER fkc_delete_on_languages
BEFORE DELETE ON languages
BEGIN
SELECT CASE
WHEN (SELECT COUNT(id) FROM books_languages_link WHERE lang_code=OLD.id) > 0
THEN RAISE(ABORT, 'Foreign key violation: language is still referenced')
END;
END;
CREATE TRIGGER fkc_delete_on_languages_link
BEFORE INSERT ON books_languages_link
BEGIN
SELECT CASE
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
END;
END;
CREATE TRIGGER fkc_delete_on_publishers
BEFORE DELETE ON publishers
BEGIN
SELECT CASE
WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE book=OLD.book) > 0
THEN RAISE(ABORT, 'Foreign key violation: publisher is still referenced')
WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=OLD.id) > 0
THEN RAISE(ABORT, 'Foreign key violation: publishers is still referenced')
END;
END;
CREATE TRIGGER fkc_delete_books_series_link
CREATE TRIGGER fkc_delete_on_series
BEFORE DELETE ON series
BEGIN
SELECT CASE
@ -193,12 +379,12 @@ CREATE TRIGGER fkc_delete_books_series_link
THEN RAISE(ABORT, 'Foreign key violation: series is still referenced')
END;
END;
CREATE TRIGGER fkc_delete_books_tags_link
CREATE TRIGGER fkc_delete_on_tags
BEFORE DELETE ON tags
BEGIN
SELECT CASE
WHEN (SELECT COUNT(id) FROM books_tags_link WHERE tag=OLD.id) > 0
THEN RAISE(ABORT, 'Foreign key violation: tag is still referenced')
THEN RAISE(ABORT, 'Foreign key violation: tags is still referenced')
END;
END;
CREATE TRIGGER fkc_insert_books_authors_link
@ -267,6 +453,22 @@ CREATE TRIGGER fkc_update_books_authors_link_b
THEN RAISE(ABORT, 'Foreign key violation: author not in authors')
END;
END;
CREATE TRIGGER fkc_update_books_languages_link_a
BEFORE UPDATE OF book ON books_languages_link
BEGIN
SELECT CASE
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
END;
END;
CREATE TRIGGER fkc_update_books_languages_link_b
BEFORE UPDATE OF lang_code ON books_languages_link
BEGIN
SELECT CASE
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
END;
END;
CREATE TRIGGER fkc_update_books_publishers_link_a
BEFORE UPDATE OF book ON books_publishers_link
BEGIN
@ -341,3 +543,4 @@ CREATE TRIGGER series_update_trg
BEGIN
UPDATE series SET sort=NEW.name WHERE id=NEW.id;
END;
pragma user_version=20;

Binary file not shown.

View File

@ -2,6 +2,11 @@ a {
text-decoration: none;
color: blue
}
a:hover {
color: red
}
.comments {
margin-top: 0;
padding-top: 0;

View File

@ -23,6 +23,9 @@ wWinMain(HINSTANCE Inst, HINSTANCE PrevInst,
ret = execute_python_entrypoint(BASENAME, MODULE, FUNCTION,
stdout_redirect, stderr_redirect);
if (stdout != NULL) fclose(stdout);
if (stderr != NULL) fclose(stderr);
DeleteFile(stdout_redirect);
DeleteFile(stderr_redirect);

View File

@ -69,7 +69,24 @@ nmake -f ms\ntdll.mak install
Qt
--------
Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::
Extract Qt sourcecode to C:\Qt\4.x.x.
Qt uses its own routine to locate and load "system libraries" including the openssl libraries needed for "Get Books". This means that we have to apply the following patch to have Qt load the openssl libraries bundled with calibre:
--- src/corelib/plugin/qsystemlibrary.cpp 2011-02-22 05:04:00.000000000 -0700
+++ src/corelib/plugin/qsystemlibrary.cpp 2011-04-25 20:53:13.635247466 -0600
@@ -110,7 +110,7 @@ HINSTANCE QSystemLibrary::load(const wch
#if !defined(QT_BOOTSTRAPPED)
if (!onlySystemDirectory)
- searchOrder << QFileInfo(qAppFileName()).path();
+ searchOrder << (QFileInfo(qAppFileName()).path().replace(QLatin1Char('/'), QLatin1Char('\\')) + QString::fromLatin1("\\DLLs\\"));
#endif
searchOrder << qSystemDirectory();
Now, run configure and make::
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake

View File

@ -11,7 +11,10 @@
SummaryCodepage='1252' />
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
<!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
<Property Id='REINSTALLMODE' Value='emus'/>
<Upgrade Id="{upgrade_code}">
<UpgradeVersion Maximum="{version}"
IncludeMaximum="yes"

View File

@ -347,9 +347,10 @@ class UploadUserManual(Command): # {{{
with NamedTemporaryFile(suffix='.zip') as f:
os.fchmod(f.fileno(),
stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE)
with CurrentDir(self.d(path)):
with CurrentDir(path):
with ZipFile(f, 'w') as zf:
for x in os.listdir('.'):
if x.endswith('.swp'): continue
zf.write(x)
if os.path.isdir(x):
for y in os.listdir(x):

View File

@ -388,7 +388,11 @@ class CurrentDir(object):
return self.cwd
def __exit__(self, *args):
os.chdir(self.cwd)
try:
os.chdir(self.cwd)
except:
# The previous CWD no longer exists
pass
class StreamReadWrapper(object):

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 7, 57)
numeric_version = (0, 8, 0)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -449,7 +449,7 @@ class CatalogPlugin(Plugin): # {{{
['author_sort','authors','comments','cover','formats',
'id','isbn','ondevice','pubdate','publisher','rating',
'series_index','series','size','tags','timestamp',
'title','uuid'])
'title_sort','title','uuid'])
all_custom_fields = set(db.custom_field_keys())
all_fields = all_std_fields.union(all_custom_fields)
@ -607,6 +607,7 @@ class StoreBase(Plugin): # {{{
supported_platforms = ['windows', 'osx', 'linux']
author = 'John Schember'
type = _('Store')
minimum_calibre_version = (0, 8, 0)
actual_plugin = None

View File

@ -9,7 +9,6 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.utils.config import test_eight_code
# To archive plugins {{{
class HTML2ZIP(FileTypePlugin):
@ -596,6 +595,7 @@ from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
from calibre.devices.prs505.driver import PRS505
from calibre.devices.user_defined.driver import USER_DEFINED
from calibre.devices.android.driver import ANDROID, S60
from calibre.devices.nokia.driver import N770, N810, E71X, E52
from calibre.devices.eslick.driver import ESLICK, EBK52
@ -613,6 +613,7 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK
from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
@ -621,29 +622,16 @@ from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, ]
if test_eight_code:
# New metadata download plugins {{{
from calibre.ebooks.metadata.sources.google import GoogleBooks
from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
from calibre.ebooks.metadata.sources.overdrive import OverDrive
from calibre.ebooks.metadata.sources.douban import Douban
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
from calibre.ebooks.metadata.sources.google import GoogleBooks
from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
from calibre.ebooks.metadata.sources.overdrive import OverDrive
from calibre.ebooks.metadata.sources.douban import Douban
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
# }}}
else:
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
KentDistrictLibrary
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
AmazonCovers, DoubanCovers
plugins += [GoogleBooks, ISBNDB, Amazon,
OpenLibraryCovers, AmazonCovers, DoubanCovers,
NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
plugins += [
ComicInput,
@ -756,6 +744,9 @@ plugins += [
EEEREADER,
NEXTBOOK,
ITUNES,
BOEYE_BEX,
BOEYE_BDX,
USER_DEFINED,
]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
@ -868,10 +859,7 @@ plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
ActionRestart, ActionOpenFolder, ActionConnectShare,
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch]
if test_eight_code:
plugins += [ActionStore]
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore]
# }}}
@ -1097,10 +1085,8 @@ class Misc(PreferencesPlugin):
plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
CommonOptions, OutputOptions, Adding, Saving, Sending, Plugboard,
Email, Server, Plugins, Tweaks, Misc, TemplateFunctions]
if test_eight_code:
plugins.append(MetadataSources)
Email, Server, Plugins, Tweaks, Misc, TemplateFunctions,
MetadataSources]
#}}}
@ -1110,6 +1096,11 @@ class StoreAmazonKindleStore(StoreBase):
description = _('Kindle books from Amazon')
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
class StoreAmazonUKKindleStore(StoreBase):
name = 'Amazon UK Kindle'
description = _('Kindle books from Amazon.uk')
actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
class StoreBaenWebScriptionStore(StoreBase):
name = 'Baen WebScription'
description = _('Ebooks for readers.')
@ -1175,10 +1166,27 @@ class StoreSmashwordsStore(StoreBase):
description = _('Your ebook. Your way.')
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
plugins += [StoreAmazonKindleStore, StoreBaenWebScriptionStore, StoreBNStore,
class StoreWaterstonesUKStore(StoreBase):
name = 'Waterstones UK'
description = _('Feel every word')
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
class StoreFoylesUKStore(StoreBase):
name = 'Foyles UK'
description = _('Foyles of London, online')
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
class AmazonDEKindleStore(StoreBase):
name = 'Amazon DE Kindle'
description = _('Kindle eBooks')
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
plugins += [StoreAmazonKindleStore, AmazonDEKindleStore, StoreAmazonUKKindleStore,
StoreBaenWebScriptionStore, StoreBNStore,
StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore,
StoreEHarlequinStoretore,
StoreFeedbooksStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore]
StoreEHarlequinStoretore, StoreFeedbooksStore,
StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
StoreWaterstonesUKStore]
# }}}

View File

@ -15,12 +15,11 @@ from calibre.customize.profiles import InputProfile, OutputProfile
from calibre.customize.builtins import plugins as builtin_plugins
from calibre.devices.interface import DevicePlugin
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.covers import CoverDownload
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
plugin_dir, OptionParser, prefs
from calibre.utils.config import (make_config_dir, Config, ConfigProxy,
plugin_dir, OptionParser)
from calibre.ebooks.epub.fix import ePubFixer
from calibre.ebooks.metadata.sources.base import Source
from calibre.constants import DEBUG
builtin_names = frozenset([p.name for p in builtin_plugins])
@ -93,8 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
config['enabled_plugins'] = ep
default_disabled_plugins = set([
'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
'Kent District Library'
'Overdrive',
])
def is_disabled(plugin):
@ -190,44 +188,6 @@ def output_profiles():
yield plugin
# }}}
# Metadata sources {{{
def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
for plugin in _initialized_plugins:
if isinstance(plugin, MetadataSource) and \
plugin.metadata_type == metadata_type:
if is_disabled(plugin):
continue
if customize:
customization = config['plugin_customization']
plugin.site_customization = customization.get(plugin.name, None)
if plugin.name == 'IsbnDB' and isbndb_key is not None:
plugin.site_customization = isbndb_key
yield plugin
def get_isbndb_key():
return config['plugin_customization'].get('IsbnDB', None)
def set_isbndb_key(key):
for plugin in _initialized_plugins:
if plugin.name == 'IsbnDB':
return customize_plugin(plugin, key)
def migrate_isbndb_key():
key = prefs['isbndb_com_key']
if key:
prefs.set('isbndb_com_key', '')
set_isbndb_key(key)
def cover_sources():
customization = config['plugin_customization']
for plugin in _initialized_plugins:
if isinstance(plugin, CoverDownload):
if not is_disabled(plugin):
plugin.site_customization = customization.get(plugin.name, '')
yield plugin
# }}}
# Interface Actions # {{{
def interface_actions():
@ -527,8 +487,9 @@ def initialize_plugins():
plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
_initialized_plugins.append(plugin)
except:
print 'Failed to initialize plugin...'
traceback.print_exc()
print 'Failed to initialize plugin:', repr(zfp)
if DEBUG:
traceback.print_exc()
_initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True)
reread_filetype_plugins()
reread_metadata_plugins()

View File

@ -156,3 +156,60 @@ def debug(ioreg_to_tmp=False, buf=None):
sys.stdout = oldo
sys.stderr = olde
def device_info(ioreg_to_tmp=False, buf=None):
from calibre.devices.scanner import DeviceScanner, win_pnp_drives
from calibre.constants import iswindows
import re
res = {}
device_details = {}
device_set = set()
drive_details = {}
drive_set = set()
res['device_set'] = device_set
res['device_details'] = device_details
res['drive_details'] = drive_details
res['drive_set'] = drive_set
try:
s = DeviceScanner()
s.scan()
devices = (s.devices)
if not iswindows:
devices = [list(x) for x in devices]
for dev in devices:
for i in range(3):
dev[i] = hex(dev[i])
d = dev[0] + dev[1] + dev[2]
device_set.add(d)
device_details[d] = dev[0:3]
else:
for dev in devices:
vid = re.search('vid_([0-9a-f]*)&', dev)
if vid:
vid = vid.group(1)
pid = re.search('pid_([0-9a-f]*)&', dev)
if pid:
pid = pid.group(1)
rev = re.search('rev_([0-9a-f]*)$', dev)
if rev:
rev = rev.group(1)
d = vid+pid+rev
device_set.add(d)
device_details[d] = (vid, pid, rev)
drives = win_pnp_drives(debug=False)
for drive,details in drives.iteritems():
order = 'ORD_' + str(drive.order)
ven = re.search('VEN_([^&]*)&', details)
if ven:
ven = ven.group(1)
prod = re.search('PROD_([^&]*)&', details)
if prod:
prod = prod.group(1)
d = (order, ven, prod)
drive_details[drive] = d
drive_set.add(drive)
finally:
pass
return res

View File

@ -62,7 +62,7 @@ class ANDROID(USBMS):
0x502 : { 0x3203 : [0x0100]},
# Dell
0x413c : { 0xb007 : [0x0100, 0x0224]},
0x413c : { 0xb007 : [0x0100, 0x0224, 0x0226]},
# LG
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
@ -109,10 +109,10 @@ class ANDROID(USBMS):
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE']
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE']
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -163,6 +163,8 @@ class ITUNES(DriverBase):
settings()
set_progress_reporter()
upload_books()
_get_fpath()
_update_epub_metadata()
add_books_to_metadata()
use_plugboard_ext()
set_plugboard()
@ -460,7 +462,7 @@ class ITUNES(DriverBase):
cached_books[this_book.path] = {
'title':book.Name,
'author':book.artist().split(' & '),
'author':book.Artist.split(' & '),
'lib_book':library_books[this_book.path] if this_book.path in library_books else None,
'uuid': book.Composer,
'format': 'pdf' if book.KindAsString.startswith('PDF') else 'epub'
@ -504,7 +506,7 @@ class ITUNES(DriverBase):
if self.iTunes:
# Check for connected book-capable device
self.sources = self._get_sources()
if 'iPod' in self.sources:
if 'iPod' in self.sources and not self.ejected:
#if DEBUG:
#sys.stdout.write('.')
#sys.stdout.flush()
@ -2034,16 +2036,17 @@ class ITUNES(DriverBase):
if 'iPod' in self.sources:
connected_device = self.sources['iPod']
device = self.iTunes.sources[connected_device]
dev_books = None
for pl in device.playlists():
if pl.special_kind() == appscript.k.Books:
if DEBUG:
self.log.info(" Book playlist: '%s'" % (pl.name()))
books = pl.file_tracks()
dev_books = pl.file_tracks()
break
else:
self.log.error(" book_playlist not found")
for book in books:
for book in dev_books:
# This may need additional entries for international iTunes users
if book.kind() in self.Audiobooks:
if DEBUG:
@ -2621,42 +2624,42 @@ class ITUNES(DriverBase):
# Touch the OPF timestamp
try:
zf_opf = ZipFile(fpath,'r')
fnames = zf_opf.namelist()
opf = [x for x in fnames if '.opf' in x][0]
except:
raise UserFeedback("'%s' is not a valid EPUB" % metadata.title,
None,
level=UserFeedback.WARN)
fnames = zf_opf.namelist()
opf = [x for x in fnames if '.opf' in x][0]
if opf:
opf_tree = etree.fromstring(zf_opf.read(opf))
md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
if md_els:
ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
if ts is not None:
timestamp = ts.get('content')
old_ts = parse_date(timestamp)
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
if DEBUG:
self.log.info(" existing timestamp: %s" % metadata.timestamp)
else:
metadata.timestamp = now()
if DEBUG:
self.log.info(" add timestamp: %s" % metadata.timestamp)
opf_tree = etree.fromstring(zf_opf.read(opf))
md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
if md_els:
ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
if ts is not None:
timestamp = ts.get('content')
old_ts = parse_date(timestamp)
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
if DEBUG:
self.log.info(" existing timestamp: %s" % metadata.timestamp)
else:
metadata.timestamp = now()
if DEBUG:
self.log.warning(" missing <metadata> block in OPF file")
self.log.info(" add timestamp: %s" % metadata.timestamp)
# Force the language declaration for iBooks 1.1
#metadata.language = get_lang().replace('_', '-')
# Updates from metadata plugboard (ignoring publisher)
metadata.language = metadata_x.language
else:
metadata.timestamp = now()
if DEBUG:
if metadata.language != metadata_x.language:
self.log.info(" rewriting language: <dc:language>%s</dc:language>" % metadata.language)
self.log.warning(" missing <metadata> block in OPF file")
self.log.info(" add timestamp: %s" % metadata.timestamp)
# Force the language declaration for iBooks 1.1
#metadata.language = get_lang().replace('_', '-')
# Updates from metadata plugboard (ignoring publisher)
metadata.language = metadata_x.language
if DEBUG:
if metadata.language != metadata_x.language:
self.log.info(" rewriting language: <dc:language>%s</dc:language>" % metadata.language)
zf_opf.close()

View File

View File

@ -0,0 +1,56 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Ken <ken at szboeye.com>'
__docformat__ = 'restructuredtext en'
'''
Device driver for BOEYE serial readers
'''
from calibre.devices.usbms.driver import USBMS
class BOEYE_BEX(USBMS):
name = 'BOEYE BEX reader driver'
gui_name = 'BOEYE BEX'
description = _('Communicate with BOEYE BEX Serial eBook readers.')
author = 'szboeye'
supported_platforms = ['windows', 'osx', 'linux']
FORMATS = ['epub', 'mobi', 'fb2', 'lit', 'prc', 'pdf', 'rtf', 'txt', 'djvu', 'doc', 'chm', 'html', 'zip', 'pdb']
VENDOR_ID = [0x0085]
PRODUCT_ID = [0x600]
VENDOR_NAME = 'LINUX'
WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
OSX_MAIN_MEM = 'Linux File-Stor Gadget Media'
MAIN_MEMORY_VOLUME_LABEL = 'BOEYE BEX Storage Card'
EBOOK_DIR_MAIN = 'Documents'
SUPPORTS_SUB_DIRS = True
class BOEYE_BDX(USBMS):
name = 'BOEYE BDX reader driver'
gui_name = 'BOEYE BDX'
description = _('Communicate with BOEYE BDX serial eBook readers.')
author = 'szboeye'
supported_platforms = ['windows', 'osx', 'linux']
FORMATS = ['epub', 'mobi', 'fb2', 'lit', 'prc', 'pdf', 'rtf', 'txt', 'djvu', 'doc', 'chm', 'html', 'zip', 'pdb']
VENDOR_ID = [0x0085]
PRODUCT_ID = [0x800]
VENDOR_NAME = 'LINUX'
WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
OSX_MAIN_MEM = 'Linux File-Stor Gadget Media'
OSX_CARD_A_MEM = 'Linux File-Stor Gadget Media'
MAIN_MEMORY_VOLUME_LABEL = 'BOEYE BDX Internal Memory'
STORAGE_CARD_VOLUME_LABEL = 'BOEYE BDX Storage Card'
EBOOK_DIR_MAIN = 'Documents'
EBOOK_DIR_CARD_A = 'Documents'
SUPPORTS_SUB_DIRS = True

View File

@ -64,7 +64,7 @@ class HANLINV3(USBMS):
return names
def linux_swap_drives(self, drives):
if len(drives) < 2: return drives
if len(drives) < 2 or not drives[1] or not drives[2]: return drives
drives = list(drives)
t = drives[0]
drives[0] = drives[1]
@ -95,7 +95,6 @@ class HANLINV5(HANLINV3):
gui_name = 'Hanlin V5'
description = _('Communicate with Hanlin V5 eBook readers.')
VENDOR_ID = [0x0492]
PRODUCT_ID = [0x8813]
BCD = [0x319]

View File

@ -164,7 +164,7 @@ class APNXBuilder(object):
if c == '/':
closing = True
continue
elif c in ('d', 'p'):
elif c == 'p':
if closing:
in_p = False
else:

View File

@ -187,7 +187,7 @@ class LUMIREAD(USBMS):
cfilepath = cfilepath.replace(os.sep+'books'+os.sep,
os.sep+'covers'+os.sep, 1)
pdir = os.path.dirname(cfilepath)
if not os.exists(pdir):
if not os.path.exists(pdir):
os.makedirs(pdir)
with open(cfilepath+'.jpg', 'wb') as f:
f.write(metadata.thumbnail[-1])

View File

@ -94,6 +94,9 @@ class DeviceConfig(object):
if isinstance(cls.EXTRA_CUSTOMIZATION_MESSAGE, list):
ec = []
for i in range(0, len(cls.EXTRA_CUSTOMIZATION_MESSAGE)):
if config_widget.opt_extra_customization[i] is None:
ec.append(None)
continue
if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
ec.append(config_widget.opt_extra_customization[i].isChecked())
else:

View File

@ -0,0 +1,110 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.devices.usbms.driver import USBMS
class USER_DEFINED(USBMS):
name = 'User Defined USB driver'
gui_name = 'User Defined USB Device'
author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux']
# Ordered list of supported formats
FORMATS = ['epub', 'mobi', 'pdf']
VENDOR_ID = 0xFFFF
PRODUCT_ID = 0xFFFF
BCD = None
EBOOK_DIR_MAIN = ''
EBOOK_DIR_CARD_A = ''
VENDOR_NAME = []
WINDOWS_MAIN_MEM = ''
WINDOWS_CARD_A_MEM = ''
OSX_MAIN_MEM = 'Device Main Memory'
MAIN_MEMORY_VOLUME_LABEL = 'Device Main Memory'
SUPPORTS_SUB_DIRS = True
EXTRA_CUSTOMIZATION_MESSAGE = [
_('USB Vendor ID (in hex)') + ':::<p>' +
_('Get this ID using Preferences -> Misc -> Get information to '
'set up the user-defined device') + '</p>',
_('USB Product ID (in hex)')+ ':::<p>' +
_('Get this ID using Preferences -> Misc -> Get information to '
'set up the user-defined device') + '</p>',
_('USB Revision ID (in hex)')+ ':::<p>' +
_('Get this ID using Preferences -> Misc -> Get information to '
'set up the user-defined device') + '</p>',
'',
_('Windows main memory vendor string') + ':::<p>' +
_('This field is used only on windows. '
'Get this ID using Preferences -> Misc -> Get information to '
'set up the user-defined device') + '</p>',
_('Windows main memory ID string') + ':::<p>' +
_('This field is used only on windows. '
'Get this ID using Preferences -> Misc -> Get information to '
'set up the user-defined device') + '</p>',
_('Windows card A vendor string') + ':::<p>' +
_('This field is used only on windows. '
'Get this ID using Preferences -> Misc -> Get information to '
'set up the user-defined device') + '</p>',
_('Windows card A ID string') + ':::<p>' +
_('This field is used only on windows. '
'Get this ID using Preferences -> Misc -> Get information to '
'set up the user-defined device') + '</p>',
_('Main memory folder') + ':::<p>' +
_('Enter the folder where the books are to be stored. This folder '
'is prepended to any send_to_device template') + '</p>',
_('Card A folder') + ':::<p>' +
_('Enter the folder where the books are to be stored. This folder '
'is prepended to any send_to_device template') + '</p>',
]
EXTRA_CUSTOMIZATION_DEFAULT = [
'0xffff',
'0xffff',
'0xffff',
None,
'',
'',
'',
'',
'',
'',
]
OPT_USB_VENDOR_ID = 0
OPT_USB_PRODUCT_ID = 1
OPT_USB_REVISION_ID = 2
OPT_USB_WINDOWS_MM_VEN_ID = 4
OPT_USB_WINDOWS_MM_ID = 5
OPT_USB_WINDOWS_CA_VEN_ID = 6
OPT_USB_WINDOWS_CA_ID = 7
OPT_MAIN_MEM_FOLDER = 8
OPT_CARD_A_FOLDER = 9
def initialize(self):
try:
e = self.settings().extra_customization
self.VENDOR_ID = int(e[self.OPT_USB_VENDOR_ID], 16)
self.PRODUCT_ID = int(e[self.OPT_USB_PRODUCT_ID], 16)
self.BCD = [int(e[self.OPT_USB_REVISION_ID], 16)]
if e[self.OPT_USB_WINDOWS_MM_VEN_ID]:
self.VENDOR_NAME.append(e[self.OPT_USB_WINDOWS_MM_VEN_ID])
if e[self.OPT_USB_WINDOWS_CA_VEN_ID] and \
e[self.OPT_USB_WINDOWS_CA_VEN_ID] not in self.VENDOR_NAME:
self.VENDOR_NAME.append(e[self.OPT_USB_WINDOWS_CA_VEN_ID])
self.WINDOWS_MAIN_MEM = e[self.OPT_USB_WINDOWS_MM_ID] + '&'
self.WINDOWS_CARD_A_MEM = e[self.OPT_USB_WINDOWS_CA_ID] + '&'
self.EBOOK_DIR_MAIN = e[self.OPT_MAIN_MEM_FOLDER]
self.EBOOK_DIR_CARD_A = e[self.OPT_CARD_A_FOLDER]
except:
import traceback
traceback.print_exc()
USBMS.initialize(self)

View File

@ -19,12 +19,12 @@ class CHMInput(InputFormatPlugin):
description = 'Convert CHM files to OEB'
file_types = set(['chm'])
def _chmtohtml(self, output_dir, chm_path, no_images, log):
def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
from calibre.ebooks.chm.reader import CHMReader
log.debug('Opening CHM file')
rdr = CHMReader(chm_path, log, self.opts)
log.debug('Extracting CHM to %s' % output_dir)
rdr.extract_content(output_dir)
rdr.extract_content(output_dir, debug_dump=debug_dump)
self._chm_reader = rdr
return rdr.hhc_path
@ -47,7 +47,12 @@ class CHMInput(InputFormatPlugin):
stream.close()
log.debug('tdir=%s' % tdir)
log.debug('stream.name=%s' % stream.name)
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
debug_dump = False
odi = options.debug_pipeline
if odi:
debug_dump = os.path.join(odi, 'input')
mainname = self._chmtohtml(tdir, chm_name, no_images, log,
debug_dump=debug_dump)
mainpath = os.path.join(tdir, mainname)
metadata = get_metadata_from_reader(self._chm_reader)
@ -56,7 +61,6 @@ class CHMInput(InputFormatPlugin):
#from calibre import ipython
#ipython()
odi = options.debug_pipeline
options.debug_pipeline = None
options.input_encoding = 'utf-8'
# try a custom conversion:

View File

@ -97,7 +97,7 @@ class CHMReader(CHMFile):
raise CHMError("'%s' is zero bytes in length!"%(path,))
return data
def ExtractFiles(self, output_dir=os.getcwdu()):
def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
html_files = set([])
for path in self.Contents():
lpath = os.path.join(output_dir, path)
@ -123,6 +123,9 @@ class CHMReader(CHMFile):
self.log.warn('%r filename too long, skipping'%path)
continue
raise
if debug_dump:
import shutil
shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
for lpath in html_files:
with open(lpath, 'r+b') as f:
data = f.read()
@ -249,8 +252,8 @@ class CHMReader(CHMFile):
if not os.path.isdir(dir):
os.makedirs(dir)
def extract_content(self, output_dir=os.getcwdu()):
self.ExtractFiles(output_dir=output_dir)
def extract_content(self, output_dir=os.getcwdu(), debug_dump=False):
self.ExtractFiles(output_dir=output_dir, debug_dump=debug_dump)

View File

@ -854,7 +854,8 @@ OptionRecommendation(name='sr3_replace',
if isinstance(ret, basestring):
shutil.copytree(output_dir, out_dir)
else:
os.makedirs(out_dir)
if not os.path.exists(out_dir):
os.makedirs(out_dir)
self.dump_oeb(ret, out_dir)
if self.input_fmt == 'recipe':
zf = ZipFile(os.path.join(self.opts.debug_pipeline,

View File

@ -402,7 +402,7 @@ class HTMLPreProcessor(object):
(re.compile(r'((?<=</a>)\s*file:/{2,4}[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
# Center separator lines
(re.compile(u'<br>\s*(?P<break>([*#•✦=]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
(re.compile(u'<br>\s*(?P<break>([*#•✦=] *){3,})\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group('break') + '</p>'),
# Remove page links
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),

View File

@ -156,17 +156,17 @@ class HeuristicProcessor(object):
]
ITALICIZE_STYLE_PATS = [
r'(?msu)(?<=[\s>])_(?P<words>[^_]+)_',
r'(?msu)(?<=[\s>])/(?P<words>[^/\*>]+)/',
r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)~~',
r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)\*',
r'(?msu)(?<=[\s>])~(?P<words>[^~]+)~',
r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)/_',
r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)\*_',
r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)/\*',
r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)/\*_',
r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+):/',
r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+):\|',
ur'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_',
ur'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*>]+)/',
ur'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~',
ur'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*',
ur'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~',
ur'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_',
ur'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_',
ur'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*',
ur'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_',
ur'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/',
ur'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|',
]
for word in ITALICIZE_WORDS:
@ -518,13 +518,13 @@ class HeuristicProcessor(object):
if re.findall('(<|>)', replacement_break):
if re.match('^<hr', replacement_break):
if replacement_break.find('width') != -1:
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
divpercent = (100 - width) / 2
hr_open = re.sub('45', str(divpercent), hr_open)
scene_break = hr_open+replacement_break+'</div>'
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
divpercent = (100 - width) / 2
hr_open = re.sub('45', str(divpercent), hr_open)
scene_break = hr_open+replacement_break+'</div>'
else:
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
elif re.match('^<img', replacement_break):
scene_break = self.scene_break_open+replacement_break+'</p>'
else:
@ -584,10 +584,10 @@ class HeuristicProcessor(object):
#print "styles for this line are: "+str(styles)
split_styles = []
for style in styles:
#print "style is: "+str(style)
newstyle = style.split(':')
#print "newstyle is: "+str(newstyle)
split_styles.append(newstyle)
#print "style is: "+str(style)
newstyle = style.split(':')
#print "newstyle is: "+str(newstyle)
split_styles.append(newstyle)
styles = split_styles
for style, setting in styles:
if style == 'text-align' and setting != 'left':

View File

@ -309,9 +309,9 @@ class HTMLInput(InputFormatPlugin):
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import DirContainer, \
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, \
xpath
from calibre.ebooks.oeb.base import (DirContainer,
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
xpath)
from calibre import guess_type
from calibre.ebooks.oeb.transforms.metadata import \
meta_info_to_oeb_metadata
@ -345,7 +345,8 @@ class HTMLInput(InputFormatPlugin):
htmlfile_map = {}
for f in filelist:
path = f.path
oeb.container = DirContainer(os.path.dirname(path), log)
oeb.container = DirContainer(os.path.dirname(path), log,
ignore_opf=True)
bname = os.path.basename(path)
id, href = oeb.manifest.generate(id='html',
href=ascii_filename(bname))
@ -369,7 +370,7 @@ class HTMLInput(InputFormatPlugin):
for f in filelist:
path = f.path
dpath = os.path.dirname(path)
oeb.container = DirContainer(dpath, log)
oeb.container = DirContainer(dpath, log, ignore_opf=True)
item = oeb.manifest.hrefs[htmlfile_map[path]]
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
@ -409,7 +410,7 @@ class HTMLInput(InputFormatPlugin):
if not item.linear: continue
toc.add(title, item.href)
oeb.container = DirContainer(os.getcwdu(), oeb.log)
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
return oeb
def link_to_local_path(self, link_, base=None):
@ -456,7 +457,7 @@ class HTMLInput(InputFormatPlugin):
href=bhref)
self.oeb.log.debug('Added', link)
self.oeb.container = self.DirContainer(os.path.dirname(link),
self.oeb.log)
self.oeb.log, ignore_opf=True)
# Load into memory
guessed = self.guess_type(href)[0]
media_type = guessed or self.BINARY_MIME

View File

@ -7,10 +7,12 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import posixpath
from calibre import walk
from calibre import guess_type, walk
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata.opf2 import OPF
from calibre.utils.zipfile import ZipFile
class HTMLZInput(InputFormatPlugin):
@ -27,7 +29,7 @@ class HTMLZInput(InputFormatPlugin):
# Extract content from zip archive.
zf = ZipFile(stream)
zf.extractall('.')
zf.extractall()
for x in walk('.'):
if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
@ -70,5 +72,24 @@ class HTMLZInput(InputFormatPlugin):
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
mi = get_file_type_metadata(stream, file_ext)
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
# Get the cover path from the OPF.
cover_href = None
opf = None
for x in walk('.'):
if os.path.splitext(x)[1].lower() in ('.opf'):
opf = x
break
if opf:
opf = OPF(opf)
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
# Set the cover.
if cover_href:
cdata = None
with open(cover_href, 'rb') as cf:
cdata = cf.read()
id, href = oeb.manifest.generate('cover', cover_href)
oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata)
oeb.guide.add('cover', 'Cover', href)
return oeb

View File

@ -7,11 +7,13 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from cStringIO import StringIO
from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
@ -79,10 +81,31 @@ class HTMLZOutput(OutputFormatPlugin):
fname = os.path.join(tdir, 'images', images[item.href])
with open(fname, 'wb') as img:
img.write(data)
# Cover
cover_path = None
try:
cover_data = None
if oeb_book.metadata.cover:
term = oeb_book.metadata.cover[0].term
cover_data = oeb_book.guide[term].item.data
if cover_data:
from calibre.utils.magick.draw import save_cover_data_to
cover_path = os.path.join(tdir, 'cover.jpg')
with open(cover_path, 'w') as cf:
cf.write('')
save_cover_data_to(cover_data, cover_path)
except:
import traceback
traceback.print_exc()
# Metadata
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1())))
mi = opf.to_book_metadata()
if cover_path:
mi.cover = 'cover.jpg'
mdataf.write(metadata_to_opf(mi))
htmlz = ZipFile(output_path, 'w')
htmlz.add_dir(tdir)

View File

@ -274,6 +274,9 @@ def check_isbn(isbn):
if not isbn:
return None
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
all_same = re.match(r'(\d)\1{9,12}$', isbn)
if all_same is not None:
return None
if len(isbn) == 10:
return check_isbn10(isbn)
if len(isbn) == 13:

View File

@ -1,224 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Fetch metadata using Amazon AWS
'''
import sys, re
from threading import RLock
from lxml import html
from lxml.html import soupparser
from calibre import browser
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
from calibre.library.comments import sanitize_comments_html
asin_cache = {}
cover_url_cache = {}
cache_lock = RLock()
def find_asin(br, isbn):
q = 'http://www.amazon.com/s/?search-alias=aps&field-keywords='+isbn
res = br.open_novisit(q)
raw = res.read()
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
root = html.fromstring(raw)
revs = root.xpath('//*[@class="asinReviewsSummary" and @name]')
revs = [x.get('name') for x in revs]
if revs:
return revs[0]
def to_asin(br, isbn):
with cache_lock:
ans = asin_cache.get(isbn, None)
if ans:
return ans
if ans is False:
return None
if len(isbn) == 13:
try:
asin = find_asin(br, isbn)
except:
import traceback
traceback.print_exc()
asin = None
else:
asin = isbn
with cache_lock:
asin_cache[isbn] = asin if asin else False
return asin
def get_social_metadata(title, authors, publisher, isbn):
mi = Metadata(title, authors)
if not isbn:
return mi
isbn = check_isbn(isbn)
if not isbn:
return mi
br = browser()
asin = to_asin(br, isbn)
if asin and get_metadata(br, asin, mi):
return mi
from calibre.ebooks.metadata.xisbn import xisbn
for i in xisbn.get_associated_isbns(isbn):
asin = to_asin(br, i)
if asin and get_metadata(br, asin, mi):
return mi
return mi
def get_cover_url(isbn, br):
isbn = check_isbn(isbn)
if not isbn:
return None
with cache_lock:
ans = cover_url_cache.get(isbn, None)
if ans:
return ans
if ans is False:
return None
asin = to_asin(br, isbn)
if asin:
ans = _get_cover_url(br, asin)
if ans:
with cache_lock:
cover_url_cache[isbn] = ans
return ans
from calibre.ebooks.metadata.xisbn import xisbn
for i in xisbn.get_associated_isbns(isbn):
asin = to_asin(br, i)
if asin:
ans = _get_cover_url(br, asin)
if ans:
with cache_lock:
cover_url_cache[isbn] = ans
cover_url_cache[i] = ans
return ans
with cache_lock:
cover_url_cache[isbn] = False
return None
def _get_cover_url(br, asin):
q = 'http://amzn.com/'+asin
try:
raw = br.open_novisit(q).read()
except Exception as e:
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return None
raise
if '<title>404 - ' in raw:
return None
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
root = soupparser.fromstring(raw)
except:
return False
imgs = root.xpath('//img[@id="prodImage" and @src]')
if imgs:
src = imgs[0].get('src')
parts = src.split('/')
if len(parts) > 3:
bn = parts[-1]
sparts = bn.split('_')
if len(sparts) > 2:
bn = sparts[0] + sparts[-1]
return ('/'.join(parts[:-1]))+'/'+bn
return None
def get_metadata(br, asin, mi):
q = 'http://amzn.com/'+asin
try:
raw = br.open_novisit(q).read()
except Exception as e:
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return False
raise
if '<title>404 - ' in raw:
return False
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
root = soupparser.fromstring(raw)
except:
return False
if root.xpath('//*[@id="errorMessage"]'):
return False
ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
if ratings:
for elem in ratings[0].xpath('descendant::*[@title]'):
t = elem.get('title').strip()
m = pat.match(t)
if m is not None:
try:
mi.rating = float(m.group(1))/float(m.group(2)) * 5
except:
pass
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
if desc:
desc = desc[0]
for c in desc.xpath('descendant::*[@class="seeAll" or'
' @class="emptyClear" or @href]'):
c.getparent().remove(c)
desc = html.tostring(desc, method='html', encoding=unicode).strip()
# remove all attributes from tags
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
# Collapse whitespace
#desc = re.sub('\n+', '\n', desc)
#desc = re.sub(' +', ' ', desc)
# Remove the notice about text referring to out of print editions
desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
# Remove comments
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
mi.comments = sanitize_comments_html(desc)
return True
def main(args=sys.argv):
import tempfile, os
tdir = tempfile.gettempdir()
br = browser()
for title, isbn in [
('The Heroes', '9780316044981'), # Test find_asin
('Learning Python', '8324616489'), # Test xisbn
('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
# Random tests
('Star Trek: Destiny: Mere Mortals', '9781416551720'),
('The Great Gatsby', '0743273567'),
]:
cpath = os.path.join(tdir, title+'.jpg')
curl = get_cover_url(isbn, br)
if curl is None:
print 'No cover found for', title
else:
open(cpath, 'wb').write(br.open_novisit(curl).read())
print 'Cover for', title, 'saved to', cpath
#import time
#st = time.time()
mi = get_social_metadata(title, None, None, isbn)
if not mi.comments:
print 'Failed to downlaod social metadata for', title
return 1
#print '\n\n', time.time() - st, '\n\n'
print mi
print '\n'
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,516 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>'
import sys, textwrap, re, traceback
from urllib import urlencode
from math import ceil
from lxml import html
from lxml.html import soupparser
from calibre.utils.date import parse_date, utcnow, replace_months
from calibre.utils.cleantext import clean_ascii_chars
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
authors_to_sort_string
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.config import OptionParser
from calibre.library.comments import sanitize_comments_html
class AmazonFr(MetadataSource):
name = 'Amazon French'
description = _('Downloads metadata from amazon.fr')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Sengian'
version = (1, 0, 0)
has_html_comments = True
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose, lang='fr')
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
class AmazonEs(MetadataSource):
name = 'Amazon Spanish'
description = _('Downloads metadata from amazon.com in spanish')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Sengian'
version = (1, 0, 0)
has_html_comments = True
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose, lang='es')
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
class AmazonEn(MetadataSource):
name = 'Amazon English'
description = _('Downloads metadata from amazon.com in english')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Sengian'
version = (1, 0, 0)
has_html_comments = True
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose, lang='en')
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
class AmazonDe(MetadataSource):
name = 'Amazon German'
description = _('Downloads metadata from amazon.de')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Sengian'
version = (1, 0, 0)
has_html_comments = True
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose, lang='de')
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
class Amazon(MetadataSource):
name = 'Amazon'
description = _('Downloads metadata from amazon.com')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Kovid Goyal & Sengian'
version = (1, 1, 0)
has_html_comments = True
def fetch(self):
# if not self.site_customization:
# return
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose, lang='all')
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
# @property
# def string_customization_help(self):
# return _('You can select here the language for metadata search with amazon.com')
def report(verbose):
if verbose:
traceback.print_exc()
class Query(object):
BASE_URL_ALL = 'http://www.amazon.com'
BASE_URL_FR = 'http://www.amazon.fr'
BASE_URL_DE = 'http://www.amazon.de'
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
max_results=20, rlang='all'):
assert not(title is None and author is None and publisher is None \
and isbn is None and keywords is None)
assert (max_results < 21)
self.max_results = int(max_results)
self.renbres = re.compile(u'\s*(\d+)\s*')
q = { 'search-alias' : 'stripbooks' ,
'unfiltered' : '1',
'field-keywords' : '',
'field-author' : '',
'field-title' : '',
'field-isbn' : '',
'field-publisher' : ''
#get to amazon detailed search page to get all options
# 'node' : '',
# 'field-binding' : '',
#before, during, after
# 'field-dateop' : '',
#month as number
# 'field-datemod' : '',
# 'field-dateyear' : '',
#french only
# 'field-collection' : '',
#many options available
}
if rlang =='all':
q['sort'] = 'relevanceexprank'
self.urldata = self.BASE_URL_ALL
elif rlang =='es':
q['sort'] = 'relevanceexprank'
q['field-language'] = 'Spanish'
self.urldata = self.BASE_URL_ALL
elif rlang =='en':
q['sort'] = 'relevanceexprank'
q['field-language'] = 'English'
self.urldata = self.BASE_URL_ALL
elif rlang =='fr':
q['sort'] = 'relevancerank'
self.urldata = self.BASE_URL_FR
elif rlang =='de':
q['sort'] = 'relevancerank'
self.urldata = self.BASE_URL_DE
self.baseurl = self.urldata
if isbn is not None:
q['field-isbn'] = isbn.replace('-', '')
else:
if title is not None:
q['field-title'] = title
if author is not None:
q['field-author'] = author
if publisher is not None:
q['field-publisher'] = publisher
if keywords is not None:
q['field-keywords'] = keywords
if isinstance(q, unicode):
q = q.encode('utf-8')
self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
def __call__(self, browser, verbose, timeout = 5.):
if verbose:
print 'Query:', self.urldata
try:
raw = browser.open_novisit(self.urldata, timeout=timeout).read()
except Exception as e:
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
raise
if '<title>404 - ' in raw:
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
feed = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
return None, self.urldata
#nb of page
try:
nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
except:
return None, self.urldata
pages =[feed]
if len(nbresults) > 1:
nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
for i in xrange(2, nbpagetoquery + 1):
try:
urldata = self.urldata + '&page=' + str(i)
raw = browser.open_novisit(urldata, timeout=timeout).read()
except Exception as e:
continue
if '<title>404 - ' in raw:
continue
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
feed = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
continue
pages.append(feed)
results = []
for x in pages:
results.extend([i.getparent().get('href') \
for i in x.xpath("//a/span[@class='srTitle']")])
return results[:self.max_results], self.baseurl
class ResultList(list):
def __init__(self, baseurl, lang = 'all'):
self.baseurl = baseurl
self.lang = lang
self.repub = re.compile(u'\((.*)\)')
self.rerat = re.compile(u'([0-9.]+)')
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
self.recom = re.compile(r'(?s)<!--.*?-->')
self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
def strip_tags_etree(self, etreeobj, invalid_tags):
for (itag, rmv) in invalid_tags.iteritems():
if rmv:
for elts in etreeobj.getiterator(itag):
elts.drop_tree()
else:
for elts in etreeobj.getiterator(itag):
elts.drop_tag()
def clean_entry(self, entry, invalid_tags = {'script': True},
invalid_id = (), invalid_class=()):
#invalid_tags: remove tag and keep content if False else remove
#remove tags
if invalid_tags:
self.strip_tags_etree(entry, invalid_tags)
#remove id
if invalid_id:
for eltid in invalid_id:
elt = entry.get_element_by_id(eltid)
if elt is not None:
elt.drop_tree()
#remove class
if invalid_class:
for eltclass in invalid_class:
elts = entry.find_class(eltclass)
if elts is not None:
for elt in elts:
elt.drop_tree()
def get_title(self, entry):
title = entry.get_element_by_id('btAsinTitle')
if title is not None:
title = title.text
return unicode(title.replace('\n', '').strip())
def get_authors(self, entry):
author = entry.get_element_by_id('btAsinTitle')
while author.getparent().tag != 'div':
author = author.getparent()
author = author.getparent()
authortext = []
for x in author.getiterator('a'):
authortext.append(unicode(x.text_content().strip()))
return authortext
def get_description(self, entry, verbose):
try:
description = entry.get_element_by_id("productDescription").find("div[@class='content']")
inv_class = ('seeAll', 'emptyClear')
inv_tags ={'img': True, 'a': False}
self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
description = html.tostring(description, method='html', encoding=unicode).strip()
# remove all attributes from tags
description = self.reattr.sub(r'<\1>', description)
# Remove the notice about text referring to out of print editions
description = self.reoutp.sub('', description)
# Remove comments
description = self.recom.sub('', description)
return unicode(sanitize_comments_html(description))
except:
report(verbose)
return None
def get_tags(self, entry, browser, verbose):
try:
tags = entry.get_element_by_id('tagContentHolder')
testptag = tags.find_class('see-all')
if testptag:
for x in testptag:
alink = x.xpath('descendant-or-self::a')
if alink:
if alink[0].get('class') == 'tgJsActive':
continue
link = self.baseurl + alink[0].get('href')
entry = self.get_individual_metadata(browser, link, verbose)
tags = entry.get_element_by_id('tagContentHolder')
break
tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
except:
report(verbose)
tags = []
return tags
def get_book_info(self, entry, mi, verbose):
try:
entry = entry.get_element_by_id('SalesRank').getparent()
except:
try:
for z in entry.getiterator('h2'):
if self.reprod.search(z.text_content()):
entry = z.getparent().find("div[@class='content']/ul")
break
except:
report(verbose)
return mi
elts = entry.findall('li')
#pub & date
elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
if elt:
pub = elt[0].find('b').tail
mi.publisher = unicode(self.repub.sub('', pub).strip())
d = self.repub.search(pub)
if d is not None:
d = d.group(1)
try:
default = utcnow().replace(day=15)
if self.lang != 'all':
d = replace_months(d, self.lang)
d = parse_date(d, assume_utc=True, default=default)
mi.pubdate = d
except:
report(verbose)
#ISBN
elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
if elt:
isbn = elt[0].find('b').tail.replace('-', '').strip()
if check_isbn(isbn):
mi.isbn = unicode(isbn)
elif len(elt) > 1:
isbn = elt[1].find('b').tail.replace('-', '').strip()
if check_isbn(isbn):
mi.isbn = unicode(isbn)
#Langue
elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
if elt:
langue = elt[0].find('b').tail.strip()
if langue:
mi.language = unicode(langue)
#ratings
elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
if elt:
ratings = elt[0].find_class('swSprite')
if ratings:
ratings = self.rerat.findall(ratings[0].get('title'))
if len(ratings) == 2:
mi.rating = float(ratings[0])/float(ratings[1]) * 5
return mi
def fill_MI(self, entry, title, authors, browser, verbose):
mi = MetaInformation(title, authors)
mi.author_sort = authors_to_sort_string(authors)
mi.comments = self.get_description(entry, verbose)
mi = self.get_book_info(entry, mi, verbose)
mi.tags = self.get_tags(entry, browser, verbose)
return mi
def get_individual_metadata(self, browser, linkdata, verbose):
try:
raw = browser.open_novisit(linkdata).read()
except Exception as e:
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
raise
if '<title>404 - ' in raw:
report(verbose)
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
return soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
report(verbose)
return
def populate(self, entries, browser, verbose=False):
for x in entries:
try:
entry = self.get_individual_metadata(browser, x, verbose)
# clean results
# inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
# inv_class = ('buyingDetailsGrid', 'productImageGrid')
# inv_tags ={'script': True, 'style': True, 'form': False}
# self.clean_entry(entry, invalid_id=inv_ids)
title = self.get_title(entry)
authors = self.get_authors(entry)
except Exception as e:
if verbose:
print 'Failed to get all details for an entry'
print e
print 'URL who failed:', x
report(verbose)
continue
self.append(self.fill_MI(entry, title, authors, browser, verbose))
def search(title=None, author=None, publisher=None, isbn=None,
max_results=5, verbose=False, keywords=None, lang='all'):
br = browser()
entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
if entries is None or len(entries) == 0:
return
#List of entry
ans = ResultList(baseurl, lang)
ans.populate(entries, br, verbose)
return ans
def option_parser():
parser = OptionParser(textwrap.dedent(\
_('''\
%prog [options]
Fetch book metadata from Amazon. You must specify one of title, author,
ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
so you should make your query as specific as possible.
You can chose the language for metadata retrieval:
All & english & french & german & spanish
'''
)))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-i', '--isbn', help='Book ISBN')
parser.add_option('-k', '--keywords', help='Keywords')
parser.add_option('-m', '--max-results', default=10,
help='Maximum number of results to fetch')
parser.add_option('-l', '--lang', default='all',
help='Chosen language for metadata search (all, en, fr, es, de)')
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
try:
results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
lang=opts.lang)
except AssertionError:
report(True)
parser.print_help()
return 1
if results is None or len(results) == 0:
print 'No result found for this search!'
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
print
if __name__ == '__main__':
sys.exit(main())

View File

@ -68,7 +68,19 @@ composite_formatter = SafeFormat()
class Metadata(object):
'''
A class representing all the metadata for a book.
A class representing all the metadata for a book. The various standard metadata
fields are available as attributes of this object. You can also stick
arbitrary attributes onto this object.
Metadata from custom columns should be accessed via the get() method,
passing in the lookup name for the column, for example: "#mytags".
Use the :meth:`is_null` method to test if a filed is null.
This object also has functions to format fields into strings.
The list of standard metadata fields grows with time is in
:data:`STANDARD_METADATA_FIELDS`.
Please keep the method based API of this class to a minimum. Every method
becomes a reserved field name.
@ -88,11 +100,19 @@ class Metadata(object):
if title:
self.title = title
if authors:
#: List of strings or []
# List of strings or []
self.author = list(authors) if authors else []# Needed for backward compatibility
self.authors = list(authors) if authors else []
def is_null(self, field):
'''
Return True if the value of filed is null in this object.
'null' means it is unknown or evaluates to False. So a title of
_('Unknown') is null or a language of 'und' is null.
Be careful with numeric fields since this will return True for zero as
well as None.
'''
null_val = NULL_VALUES.get(field, None)
val = getattr(self, field, None)
return not val or val == null_val
@ -120,7 +140,11 @@ class Metadata(object):
_('TEMPLATE ERROR'),
self).strip()
return val
if field.startswith('#') and field.endswith('_index'):
try:
return self.get_extra(field[:-6])
except:
pass
raise AttributeError(
'Metadata object has no attribute named: '+ repr(field))
@ -170,11 +194,6 @@ class Metadata(object):
try:
return self.__getattribute__(field)
except AttributeError:
if field.startswith('#') and field.endswith('_index'):
try:
return self.get_extra(field[:-6])
except:
pass
return default
def get_extra(self, field, default=None):
@ -544,17 +563,24 @@ class Metadata(object):
def format_tags(self):
return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)])
def format_rating(self):
return unicode(self.rating)
def format_rating(self, v=None, divide_by=1.0):
if v is None:
if self.rating is not None:
return unicode(self.rating/divide_by)
return u'None'
return unicode(v/divide_by)
def format_field(self, key, series_with_index=True):
'''
Returns the tuple (display_name, formatted_value)
'''
name, val, ign, ign = self.format_field_extended(key, series_with_index)
return (name, val)
def format_field_extended(self, key, series_with_index=True):
from calibre.ebooks.metadata import authors_to_string
'''
returns the tuple (field_name, formatted_value, original_value,
returns the tuple (display_name, formatted_value, original_value,
field_metadata)
'''
@ -631,13 +657,17 @@ class Metadata(object):
res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
elif datatype == 'rating':
res = res/2.0
elif key in ('book_size', 'size'):
elif key == 'size':
res = human_readable(res)
return (name, unicode(res), orig_res, fmeta)
return (None, None, None, None)
def __unicode__(self):
'''
A string representation of this object, suitable for printing to
console
'''
from calibre.ebooks.metadata import authors_to_string
ans = []
def fmt(x, y):
@ -681,6 +711,9 @@ class Metadata(object):
return u'\n'.join(ans)
def to_html(self):
'''
A HTML representation of this object.
'''
from calibre.ebooks.metadata import authors_to_string
ans = [(_('Title'), unicode(self.title))]
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]

View File

@ -1,317 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import traceback, socket, sys
from functools import partial
from threading import Thread, Event
from Queue import Queue, Empty
from lxml import etree
import mechanize
from calibre.customize import Plugin
from calibre import browser, prints
from calibre.constants import preferred_encoding, DEBUG
class CoverDownload(Plugin):
'''
These plugins are used to download covers for books.
'''
supported_platforms = ['windows', 'osx', 'linux']
author = 'Kovid Goyal'
type = _('Cover download')
def has_cover(self, mi, ans, timeout=5.):
'''
Check if the book described by mi has a cover. Call ans.set() if it
does. Do nothing if it doesn't.
:param mi: MetaInformation object
:param timeout: timeout in seconds
:param ans: A threading.Event object
'''
raise NotImplementedError()
def get_covers(self, mi, result_queue, abort, timeout=5.):
'''
Download covers for books described by the mi object. Downloaded covers
must be put into the result_queue. If more than one cover is available,
the plugin should continue downloading them and putting them into
result_queue until abort.is_set() returns True.
:param mi: MetaInformation object
:param result_queue: A multithreaded Queue
:param abort: A threading.Event object
:param timeout: timeout in seconds
'''
raise NotImplementedError()
def exception_to_string(self, ex):
try:
return unicode(ex)
except:
try:
return str(ex).decode(preferred_encoding, 'replace')
except:
return repr(ex)
def debug(self, *args, **kwargs):
if DEBUG:
prints('\t'+self.name+':', *args, **kwargs)
class HeadRequest(mechanize.Request):
def get_method(self):
return 'HEAD'
class OpenLibraryCovers(CoverDownload): # {{{
'Download covers from openlibrary.org'
# See http://openlibrary.org/dev/docs/api/covers
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
name = 'openlibrary.org covers'
description = _('Download covers from openlibrary.org')
author = 'Kovid Goyal'
def has_cover(self, mi, ans, timeout=5.):
if not mi.isbn:
return False
from calibre.ebooks.metadata.library_thing import get_browser
br = get_browser()
br.set_handle_redirect(False)
try:
br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout)
self.debug('cover for', mi.isbn, 'found')
ans.set()
except Exception as e:
if callable(getattr(e, 'getcode', None)) and e.getcode() == 302:
self.debug('cover for', mi.isbn, 'found')
ans.set()
else:
self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.):
if not mi.isbn:
return
from calibre.ebooks.metadata.library_thing import get_browser
br = get_browser()
try:
ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read()
result_queue.put((True, ans, 'jpg', self.name))
except Exception as e:
if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
result_queue.put((False, _('ISBN: %s not found')%mi.isbn, '', self.name))
else:
result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name))
# }}}
class AmazonCovers(CoverDownload): # {{{
name = 'amazon.com covers'
description = _('Download covers from amazon.com')
author = 'Kovid Goyal'
def has_cover(self, mi, ans, timeout=5.):
if not mi.isbn:
return False
from calibre.ebooks.metadata.amazon import get_cover_url
br = browser()
try:
get_cover_url(mi.isbn, br)
self.debug('cover for', mi.isbn, 'found')
ans.set()
except Exception as e:
self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.):
if not mi.isbn:
return
from calibre.ebooks.metadata.amazon import get_cover_url
br = browser()
try:
url = get_cover_url(mi.isbn, br)
if url is None:
raise ValueError('No cover found for ISBN: %s'%mi.isbn)
cover_data = br.open_novisit(url).read()
result_queue.put((True, cover_data, 'jpg', self.name))
except Exception as e:
result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name))
# }}}
def check_for_cover(mi, timeout=5.): # {{{
from calibre.customize.ui import cover_sources
ans = Event()
checkers = [partial(p.has_cover, mi, ans, timeout=timeout) for p in
cover_sources()]
workers = [Thread(target=c) for c in checkers]
for w in workers:
w.daemon = True
w.start()
while not ans.is_set():
ans.wait(0.1)
if sum([int(w.is_alive()) for w in workers]) == 0:
break
return ans.is_set()
# }}}
def download_covers(mi, result_queue, max_covers=50, timeout=5.): # {{{
from calibre.customize.ui import cover_sources
abort = Event()
temp = Queue()
getters = [partial(p.get_covers, mi, temp, abort, timeout=timeout) for p in
cover_sources()]
workers = [Thread(target=c) for c in getters]
for w in workers:
w.daemon = True
w.start()
count = 0
while count < max_covers:
try:
result = temp.get_nowait()
if result[0]:
count += 1
result_queue.put(result)
except Empty:
pass
if sum([int(w.is_alive()) for w in workers]) == 0:
break
abort.set()
while True:
try:
result = temp.get_nowait()
count += 1
result_queue.put(result)
except Empty:
break
# }}}
class DoubanCovers(CoverDownload): # {{{
'Download covers from Douban.com'
DOUBAN_ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
name = 'Douban.com covers'
description = _('Download covers from Douban.com')
author = 'Li Fanxi'
def get_cover_url(self, isbn, br, timeout=5.):
try:
url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY
src = br.open(url, timeout=timeout).read()
except Exception as err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
err = Exception(_('Douban.com API timed out. Try again later.'))
raise err
else:
feed = etree.fromstring(src)
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'db': 'http://www.douban.com/xmlns/'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entries = XPath('//atom:entry')(feed)
if len(entries) < 1:
return None
try:
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
u = cover_url(entries[0])[0].replace('/spic/', '/lpic/');
# If URL contains "book-default", the book doesn't have a cover
if u.find('book-default') != -1:
return None
except:
return None
return u
def has_cover(self, mi, ans, timeout=5.):
if not mi.isbn:
return False
br = browser()
try:
if self.get_cover_url(mi.isbn, br, timeout=timeout) != None:
self.debug('cover for', mi.isbn, 'found')
ans.set()
except Exception as e:
self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.):
if not mi.isbn:
return
br = browser()
try:
url = self.get_cover_url(mi.isbn, br, timeout=timeout)
cover_data = br.open_novisit(url).read()
result_queue.put((True, cover_data, 'jpg', self.name))
except Exception as e:
result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name))
# }}}
def download_cover(mi, timeout=5.): # {{{
results = Queue()
download_covers(mi, results, max_covers=1, timeout=timeout)
errors, ans = [], None
while True:
try:
x = results.get_nowait()
if x[0]:
ans = x[1]
else:
errors.append(x)
except Empty:
break
return ans, errors
# }}}
def test(isbns): # {{{
from calibre.ebooks.metadata import MetaInformation
mi = MetaInformation('test', ['test'])
for isbn in isbns:
prints('Testing ISBN:', isbn)
mi.isbn = isbn
found = check_for_cover(mi)
prints('Has cover:', found)
ans, errors = download_cover(mi)
if ans is not None:
prints('Cover downloaded')
else:
prints('Download failed:')
for err in errors:
prints('\t', err[-1]+':', err[1])
print '\n'
# }}}
if __name__ == '__main__':
isbns = sys.argv[1:] + ['9781591025412', '9780307272119']
#test(isbns)
from calibre.ebooks.metadata import MetaInformation
oc = OpenLibraryCovers(None)
for isbn in isbns:
mi = MetaInformation('xx', ['yy'])
mi.isbn = isbn
rq = Queue()
oc.get_covers(mi, rq, Event())
result = rq.get_nowait()
if not result[0]:
print 'Failed for ISBN:', isbn
print result

View File

@ -1,263 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>; 2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import sys, textwrap
import traceback
from urllib import urlencode
from functools import partial
from lxml import etree
from calibre import browser, preferred_encoding
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.date import parse_date, utcnow
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'db': 'http://www.douban.com/xmlns/'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
total_results = XPath('//openSearch:totalResults')
start_index = XPath('//openSearch:startIndex')
items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
title = XPath('descendant::atom:title')
description = XPath('descendant::atom:summary')
publisher = XPath("descendant::db:attribute[@name='publisher']")
isbn = XPath("descendant::db:attribute[@name='isbn13']")
date = XPath("descendant::db:attribute[@name='pubdate']")
creator = XPath("descendant::db:attribute[@name='author']")
tag = XPath("descendant::db:tag")
CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
class DoubanBooks(MetadataSource):
name = 'Douban Books'
description = _('Downloads metadata from Douban.com')
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
author = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin
version = (1, 0, 1) # The version number of this plugin
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10,
verbose=self.verbose)
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
def report(verbose):
if verbose:
import traceback
traceback.print_exc()
class Query(object):
SEARCH_URL = 'http://api.douban.com/book/subjects?'
ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
type = "search"
def __init__(self, title=None, author=None, publisher=None, isbn=None,
max_results=20, start_index=1, api_key=''):
assert not(title is None and author is None and publisher is None and \
isbn is None)
assert (int(max_results) < 21)
q = ''
if isbn is not None:
q = isbn
self.type = 'isbn'
else:
def build_term(parts):
return ' '.join(x for x in parts)
if title is not None:
q += build_term(title.split())
if author is not None:
q += (' ' if q else '') + build_term(author.split())
if publisher is not None:
q += (' ' if q else '') + build_term(publisher.split())
self.type = 'search'
if isinstance(q, unicode):
q = q.encode('utf-8')
if self.type == "isbn":
self.url = self.ISBN_URL + q
if api_key != '':
self.url = self.url + "?apikey=" + api_key
else:
self.url = self.SEARCH_URL+urlencode({
'q':q,
'max-results':max_results,
'start-index':start_index,
})
if api_key != '':
self.url = self.url + "&apikey=" + api_key
def __call__(self, browser, verbose):
if verbose:
print 'Query:', self.url
if self.type == "search":
feed = etree.fromstring(browser.open(self.url).read())
total = int(total_results(feed)[0].text)
start = int(start_index(feed)[0].text)
entries = entry(feed)
new_start = start + len(entries)
if new_start > total:
new_start = 0
return entries, new_start
elif self.type == "isbn":
feed = etree.fromstring(browser.open(self.url).read())
entries = entry(feed)
return entries, 0
class ResultList(list):
def get_description(self, entry, verbose):
try:
desc = description(entry)
if desc:
return 'SUMMARY:\n'+desc[0].text
except:
report(verbose)
def get_title(self, entry):
candidates = [x.text for x in title(entry)]
return ': '.join(candidates)
def get_authors(self, entry):
m = creator(entry)
if not m:
m = []
m = [x.text for x in m]
return m
def get_tags(self, entry, verbose):
try:
btags = [x.attrib["name"] for x in tag(entry)]
tags = []
for t in btags:
tags.extend([y.strip() for y in t.split('/')])
tags = list(sorted(list(set(tags))))
except:
report(verbose)
tags = []
return [x.replace(',', ';') for x in tags]
def get_publisher(self, entry, verbose):
try:
pub = publisher(entry)[0].text
except:
pub = None
return pub
def get_isbn(self, entry, verbose):
try:
isbn13 = isbn(entry)[0].text
except Exception:
isbn13 = None
return isbn13
def get_date(self, entry, verbose):
try:
d = date(entry)
if d:
default = utcnow().replace(day=15)
d = parse_date(d[0].text, assume_utc=True, default=default)
else:
d = None
except:
report(verbose)
d = None
return d
def populate(self, entries, browser, verbose=False, api_key=''):
for x in entries:
try:
id_url = entry_id(x)[0].text
title = self.get_title(x)
except:
report(verbose)
mi = MetaInformation(title, self.get_authors(x))
try:
if api_key != '':
id_url = id_url + "?apikey=" + api_key
raw = browser.open(id_url).read()
feed = etree.fromstring(raw)
x = entry(feed)[0]
except Exception as e:
if verbose:
print 'Failed to get all details for an entry'
print e
mi.comments = self.get_description(x, verbose)
mi.tags = self.get_tags(x, verbose)
mi.isbn = self.get_isbn(x, verbose)
mi.publisher = self.get_publisher(x, verbose)
mi.pubdate = self.get_date(x, verbose)
self.append(mi)
def search(title=None, author=None, publisher=None, isbn=None,
verbose=False, max_results=40, api_key=None):
br = browser()
start, entries = 1, []
if api_key is None:
api_key = CALIBRE_DOUBAN_API_KEY
while start > 0 and len(entries) <= max_results:
new, start = Query(title=title, author=author, publisher=publisher,
isbn=isbn, max_results=max_results, start_index=start, api_key=api_key)(br, verbose)
if not new:
break
entries.extend(new)
entries = entries[:max_results]
ans = ResultList()
ans.populate(entries, br, verbose, api_key)
return ans
def option_parser():
parser = OptionParser(textwrap.dedent(
'''\
%prog [options]
Fetch book metadata from Douban. You must specify one of title, author,
publisher or ISBN. If you specify ISBN the others are ignored. Will
fetch a maximum of 100 matches, so you should make your query as
specific as possible.
'''
))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-i', '--isbn', help='Book ISBN')
parser.add_option('-m', '--max-results', default=10,
help='Maximum number of results to fetch')
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
try:
results = search(opts.title, opts.author, opts.publisher, opts.isbn,
verbose=opts.verbose, max_results=int(opts.max_results))
except AssertionError:
report(True)
parser.print_help()
return 1
for result in results:
print unicode(result).encode(preferred_encoding)
print
if __name__ == '__main__':
sys.exit(main())

View File

@ -13,7 +13,7 @@ import posixpath
from cStringIO import StringIO
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.zipfile import ZipFile, safe_replace
@ -31,9 +31,9 @@ def get_metadata(stream, extract_cover=True):
opf = OPF(opf_stream)
mi = opf.to_book_metadata()
if extract_cover:
cover_name = opf.raster_cover
if cover_name:
mi.cover_data = ('jpg', zf.read(cover_name))
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
if cover_href:
mi.cover_data = ('jpg', zf.read(cover_href))
except:
return mi
return mi
@ -59,17 +59,20 @@ def set_metadata(stream, mi):
except:
pass
if new_cdata:
raster_cover = opf.raster_cover
if not raster_cover:
raster_cover = 'cover.jpg'
cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover)
cover = opf.cover
if not cover:
cover = 'cover.jpg'
cpath = posixpath.join(posixpath.dirname(opf_path), cover)
new_cover = _write_new_cover(new_cdata, cpath)
replacements[cpath] = open(new_cover.name, 'rb')
mi.cover = cover
# Update the metadata.
opf.smart_update(mi, replace_metadata=True)
old_mi = opf.to_book_metadata()
old_mi.smart_update(mi)
opf.smart_update(metadata_to_opf(old_mi), replace_metadata=True)
newopf = StringIO(opf.render())
safe_replace(stream, opf_path, newopf, extra_replacements=replacements)
safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True)
# Cleanup temporary files.
try:

View File

@ -1,523 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import traceback, sys, textwrap, re
from threading import Thread
from calibre import prints
from calibre.utils.config import OptionParser
from calibre.utils.logging import default_log
from calibre.utils.titlecase import titlecase
from calibre.customize import Plugin
from calibre.ebooks.metadata.covers import check_for_cover
from calibre.utils.html2text import html2text
metadata_config = None
class MetadataSource(Plugin): # {{{
'''
Represents a source to query for metadata. Subclasses must implement
at least the fetch method.
When :meth:`fetch` is called, the `self` object will have the following
useful attributes (each of which may be None)::
title, book_author, publisher, isbn, log, verbose and extra
Use these attributes to construct the search query. extra is reserved for
future use.
The fetch method must store the results in `self.results` as a list of
:class:`Metadata` objects. If there is an error, it should be stored
in `self.exception` and `self.tb` (for the traceback).
'''
author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux']
#: The type of metadata fetched. 'basic' means basic metadata like
#: title/author/isbn/etc. 'social' means social metadata like
#: tags/rating/reviews/etc.
metadata_type = 'basic'
#: If not None, the customization dialog will allow for string
#: based customization as well the default customization. The
#: string customization will be saved in the site_customization
#: member.
string_customization_help = None
#: Set this to true if your plugin returns HTML markup in comments.
#: Then if the user disables HTML, calibre will automagically convert
#: the HTML to Markdown.
has_html_comments = False
type = _('Metadata download')
def __call__(self, title, author, publisher, isbn, verbose, log=None,
extra=None):
self.worker = Thread(target=self._fetch)
self.worker.daemon = True
self.title = title
self.verbose = verbose
self.book_author = author
self.publisher = publisher
self.isbn = isbn
self.log = log if log is not None else default_log
self.extra = extra
self.exception, self.tb, self.results = None, None, []
self.worker.start()
def _fetch(self):
try:
self.fetch()
if self.results:
c = self.config_store().get(self.name, {})
res = self.results
if hasattr(res, 'authors'):
res = [res]
for mi in res:
if not c.get('rating', True):
mi.rating = None
if not c.get('comments', True):
mi.comments = None
if not c.get('tags', True):
mi.tags = []
if self.has_html_comments and mi.comments and \
c.get('textcomments', False):
try:
mi.comments = html2text(mi.comments)
except:
traceback.print_exc()
mi.comments = None
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
def fetch(self):
'''
All the actual work is done here.
'''
raise NotImplementedError
def join(self):
return self.worker.join()
def is_alive(self):
return self.worker.is_alive()
def is_customizable(self):
return True
def config_store(self):
global metadata_config
if metadata_config is None:
from calibre.utils.config import XMLConfig
metadata_config = XMLConfig('plugins/metadata_download')
return metadata_config
def config_widget(self):
from PyQt4.Qt import QWidget, QVBoxLayout, QLabel, Qt, QLineEdit, \
QCheckBox
from calibre.customize.ui import config
w = QWidget()
w._layout = QVBoxLayout(w)
w.setLayout(w._layout)
if self.string_customization_help is not None:
w._sc_label = QLabel(self.string_customization_help, w)
w._layout.addWidget(w._sc_label)
customization = config['plugin_customization']
def_sc = customization.get(self.name, '')
if not def_sc:
def_sc = ''
w._sc = QLineEdit(def_sc, w)
w._layout.addWidget(w._sc)
w._sc_label.setWordWrap(True)
w._sc_label.setTextInteractionFlags(Qt.LinksAccessibleByMouse
| Qt.LinksAccessibleByKeyboard)
w._sc_label.setOpenExternalLinks(True)
c = self.config_store()
c = c.get(self.name, {})
for x, l in {'rating':_('ratings'), 'tags':_('tags'),
'comments':_('description/reviews')}.items():
cb = QCheckBox(_('Download %s from %s')%(l,
self.name))
setattr(w, '_'+x, cb)
cb.setChecked(c.get(x, True))
w._layout.addWidget(cb)
if self.has_html_comments:
cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
setattr(w, '_textcomments', cb)
cb.setChecked(c.get('textcomments', False))
w._layout.addWidget(cb)
return w
def save_settings(self, w):
dl_settings = {}
for x in ('rating', 'tags', 'comments'):
dl_settings[x] = getattr(w, '_'+x).isChecked()
if self.has_html_comments:
dl_settings['textcomments'] = getattr(w, '_textcomments').isChecked()
c = self.config_store()
c.set(self.name, dl_settings)
if hasattr(w, '_sc'):
sc = unicode(w._sc.text()).strip()
from calibre.customize.ui import customize_plugin
customize_plugin(self, sc)
def customization_help(self):
return 'This plugin can only be customized using the GUI'
# }}}
class GoogleBooks(MetadataSource): # {{{
name = 'Google Books'
description = _('Downloads metadata from Google Books')
def fetch(self):
from calibre.ebooks.metadata.google_books import search
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10,
verbose=self.verbose)
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
# }}}
class ISBNDB(MetadataSource): # {{{
name = 'IsbnDB'
description = _('Downloads metadata from isbndb.com')
def fetch(self):
if not self.site_customization:
return
from calibre.ebooks.metadata.isbndb import option_parser, create_books
args = ['isbndb']
if self.isbn:
args.extend(['--isbn', self.isbn])
else:
if self.title:
args.extend(['--title', self.title])
if self.book_author:
args.extend(['--author', self.book_author])
if self.publisher:
args.extend(['--publisher', self.publisher])
if self.verbose:
args.extend(['--verbose'])
args.append(self.site_customization) # IsbnDb key
try:
opts, args = option_parser().parse_args(args)
self.results = create_books(opts, args)
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
@property
def string_customization_help(self):
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
'and enter your access key below.')
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
# }}}
class Amazon(MetadataSource): # {{{
name = 'Amazon'
metadata_type = 'social'
description = _('Downloads social metadata from amazon.com')
has_html_comments = True
def fetch(self):
if not self.isbn:
return
from calibre.ebooks.metadata.amazon import get_social_metadata
try:
self.results = get_social_metadata(self.title, self.book_author,
self.publisher, self.isbn)
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
# }}}
class KentDistrictLibrary(MetadataSource): # {{{
name = 'Kent District Library'
metadata_type = 'social'
description = _('Downloads series information from ww2.kdl.org. '
'This website cannot handle large numbers of queries, '
'so the plugin is disabled by default.')
def fetch(self):
if not self.title or not self.book_author:
return
from calibre.ebooks.metadata.kdl import get_series
try:
self.results = get_series(self.title, self.book_author)
except Exception as e:
import traceback
traceback.print_exc()
self.exception = e
self.tb = traceback.format_exc()
# }}}
def result_index(source, result):
if not result.isbn:
return -1
for i, x in enumerate(source):
if x.isbn == result.isbn:
return i
return -1
def merge_results(one, two):
if two is not None and one is not None:
for x in two:
idx = result_index(one, x)
if idx < 0:
one.append(x)
else:
one[idx].smart_update(x)
class MetadataSources(object):
def __init__(self, sources):
self.sources = sources
def __enter__(self):
for s in self.sources:
s.__enter__()
return self
def __exit__(self, *args):
for s in self.sources:
s.__exit__()
def __call__(self, *args, **kwargs):
for s in self.sources:
s(*args, **kwargs)
def join(self):
for s in self.sources:
s.join()
def filter_metadata_results(item):
keywords = ["audio", "tape", "cassette", "abridged", "playaway"]
for keyword in keywords:
if item.publisher and keyword in item.publisher.lower():
return False
return True
def do_cover_check(item):
item.has_cover = False
try:
item.has_cover = check_for_cover(item)
except:
pass # Cover not found
def check_for_covers(items):
threads = [Thread(target=do_cover_check, args=(item,)) for item in items]
for t in threads: t.start()
for t in threads: t.join()
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
verbose=0):
assert not(title is None and author is None and publisher is None and \
isbn is None)
from calibre.customize.ui import metadata_sources, migrate_isbndb_key
migrate_isbndb_key()
if isbn is not None:
isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
fetchers = list(metadata_sources(isbndb_key=isbndb_key))
with MetadataSources(fetchers) as manager:
manager(title, author, publisher, isbn, verbose)
manager.join()
results = list(fetchers[0].results) if fetchers else []
for fetcher in fetchers[1:]:
merge_results(results, fetcher.results)
results = list(filter(filter_metadata_results, results))
check_for_covers(results)
words = ("the", "a", "an", "of", "and")
prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
trailing_paren_pat = re.compile(r'\(.*\)$')
whitespace_pat = re.compile(r'\s+')
def sort_func(x, y):
def cleanup_title(s):
if s is None:
s = _('Unknown')
s = s.strip().lower()
s = prefix_pat.sub(' ', s)
s = trailing_paren_pat.sub('', s)
s = whitespace_pat.sub(' ', s)
return s.strip()
t = cleanup_title(title)
x_title = cleanup_title(x.title)
y_title = cleanup_title(y.title)
# prefer titles that start with the search title
tx = cmp(t, x_title)
ty = cmp(t, y_title)
result = 0 if abs(tx) == abs(ty) else abs(tx) - abs(ty)
# then prefer titles that have a cover image
if result == 0:
result = -cmp(x.has_cover, y.has_cover)
# then prefer titles with the longest comment, with in 10%
if result == 0:
cx = len(x.comments.strip() if x.comments else '')
cy = len(y.comments.strip() if y.comments else '')
t = (cx + cy) / 20
result = cy - cx
if abs(result) < t:
result = 0
return result
results = sorted(results, cmp=sort_func)
# if for some reason there is no comment in the top selection, go looking for one
if len(results) > 1:
if not results[0].comments or len(results[0].comments) == 0:
for r in results[1:]:
try:
if title and title.lower() == r.title[:len(title)].lower() \
and r.comments and len(r.comments):
results[0].comments = r.comments
break
except:
pass
# Find a pubdate
pubdate = None
for r in results:
if r.pubdate is not None:
pubdate = r.pubdate
break
if pubdate is not None:
for r in results:
if r.pubdate is None:
r.pubdate = pubdate
def fix_case(x):
if x:
x = titlecase(x)
return x
for r in results:
r.title = fix_case(r.title)
if r.authors:
r.authors = list(map(fix_case, r.authors))
return results, [(x.name, x.exception, x.tb) for x in fetchers]
def get_social_metadata(mi, verbose=0):
from calibre.customize.ui import metadata_sources
fetchers = list(metadata_sources(metadata_type='social'))
with MetadataSources(fetchers) as manager:
manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
manager.join()
ratings, tags, comments, series, series_index = [], set([]), set([]), None, None
for fetcher in fetchers:
if fetcher.results:
dmi = fetcher.results
if dmi.rating is not None:
ratings.append(dmi.rating)
if dmi.tags:
for t in dmi.tags:
tags.add(t)
if mi.pubdate is None and dmi.pubdate is not None:
mi.pubdate = dmi.pubdate
if dmi.comments:
comments.add(dmi.comments)
if dmi.series is not None:
series = dmi.series
if dmi.series_index is not None:
series_index = dmi.series_index
if ratings:
rating = sum(ratings)/float(len(ratings))
if mi.rating is None or mi.rating < 0.1:
mi.rating = rating
else:
mi.rating = (mi.rating + rating)/2.0
if tags:
if not mi.tags:
mi.tags = []
mi.tags += list(tags)
mi.tags = list(sorted(list(set(mi.tags))))
if comments:
if not mi.comments or len(mi.comments)+20 < len(' '.join(comments)):
mi.comments = ''
for x in comments:
mi.comments += x+'\n\n'
if series and series_index is not None:
mi.series = series
mi.series_index = series_index
return [(x.name, x.exception, x.tb) for x in fetchers if x.exception is not
None]
def option_parser():
parser = OptionParser(textwrap.dedent(
'''\
%prog [options]
Fetch book metadata from online sources. You must specify at least one
of title, author, publisher or ISBN. If you specify ISBN, the others
are ignored.
'''
))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-i', '--isbn', help='Book ISBN')
parser.add_option('-m', '--max-results', default=10,
help='Maximum number of results to fetch')
parser.add_option('-k', '--isbndb-key',
help=('The access key for your ISBNDB.com account. '
'Only needed if you want to search isbndb.com '
'and you haven\'t customized the IsbnDB plugin.'))
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
results, exceptions = search(opts.title, opts.author, opts.publisher,
opts.isbn, opts.isbndb_key, opts.verbose)
social_exceptions = []
for result in results:
social_exceptions.extend(get_social_metadata(result, opts.verbose))
prints(unicode(result))
print
for name, exception, tb in exceptions+social_exceptions:
if exception is not None:
print 'WARNING: Fetching from', name, 'failed with error:'
print exception
print tb
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,390 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en'
import sys, textwrap, re, traceback, socket
from urllib import urlencode
from lxml.html import soupparser, tostring
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
authors_to_sort_string
from calibre.library.comments import sanitize_comments_html
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars
class Fictionwise(MetadataSource): # {{{
author = 'Sengian'
name = 'Fictionwise'
description = _('Downloads metadata from Fictionwise')
has_html_comments = True
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose)
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
# }}}
class FictionwiseError(Exception):
pass
def report(verbose):
if verbose:
traceback.print_exc()
class Query(object):
BASE_URL = 'http://www.fictionwise.com/servlet/mw'
def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
assert not(title is None and author is None and publisher is None and keywords is None)
assert (max_results < 21)
self.max_results = int(max_results)
q = { 'template' : 'searchresults_adv.htm' ,
'searchtitle' : '',
'searchauthor' : '',
'searchpublisher' : '',
'searchkeyword' : '',
#possibilities startoflast, fullname, lastfirst
'searchauthortype' : 'startoflast',
'searchcategory' : '',
'searchcategory2' : '',
'searchprice_s' : '0',
'searchprice_e' : 'ANY',
'searchformat' : '',
'searchgeo' : 'US',
'searchfwdatetype' : '',
#maybe use dates fields if needed?
#'sortorder' : 'DESC',
#many options available: b.SortTitle, a.SortName,
#b.DateFirstPublished, b.FWPublishDate
'sortby' : 'b.SortTitle'
}
if title is not None:
q['searchtitle'] = title
if author is not None:
q['searchauthor'] = author
if publisher is not None:
q['searchpublisher'] = publisher
if keywords is not None:
q['searchkeyword'] = keywords
if isinstance(q, unicode):
q = q.encode('utf-8')
self.urldata = urlencode(q)
def __call__(self, browser, verbose, timeout = 5.):
if verbose:
print _('Query: %s') % self.BASE_URL+self.urldata
try:
raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
except Exception as e:
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
raise FictionwiseError(_('Fictionwise encountered an error.'))
if '<title>404 - ' in raw:
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
feed = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
feed = soupparser.fromstring(clean_ascii_chars(raw))
except:
return None
# get list of results as links
results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
results = results[:self.max_results]
results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
#return feed if no links ie normally a single book or nothing
if not results:
results = [feed]
return results
class ResultList(list):
BASE_URL = 'http://www.fictionwise.com'
COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
def __init__(self):
self.retitle = re.compile(r'\[[^\[\]]+\]')
self.rechkauth = re.compile(r'.*book\s*by', re.I)
self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
self.resplitbr = re.compile(r'<br[^>]*>', re.I)
self.recomment = re.compile(r'(?s)<!--.*?-->')
self.reimg = re.compile(r'<img[^>]*>', re.I)
self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
self.renbcom = re.compile('(?P<nbcom>\d+)\s*Reader Ratings:')
self.recolor = re.compile('(?P<ncolor>[^/]+).gif')
self.resplitbrdiv = re.compile(r'(<br[^>]+>|</?div[^>]*>)', re.I)
self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
def strip_tags_etree(self, etreeobj, invalid_tags):
for (itag, rmv) in invalid_tags.iteritems():
if rmv:
for elts in etreeobj.getiterator(itag):
elts.drop_tree()
else:
for elts in etreeobj.getiterator(itag):
elts.drop_tag()
def clean_entry(self, entry, invalid_tags = {'script': True},
invalid_id = (), invalid_class=(), invalid_xpath = ()):
#invalid_tags: remove tag and keep content if False else remove
#remove tags
if invalid_tags:
self.strip_tags_etree(entry, invalid_tags)
#remove xpath
if invalid_xpath:
for eltid in invalid_xpath:
elt = entry.xpath(eltid)
for el in elt:
el.drop_tree()
#remove id
if invalid_id:
for eltid in invalid_id:
elt = entry.get_element_by_id(eltid)
if elt is not None:
elt.drop_tree()
#remove class
if invalid_class:
for eltclass in invalid_class:
elts = entry.find_class(eltclass)
if elts is not None:
for elt in elts:
elt.drop_tree()
def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
out = tostring(entry, pretty_print=prettyout)
#try to work around tostring to remove this encoding for exemle
reclean = re.compile('(\n+|\t+|\r+|&#'+htmlrm+';)')
return reclean.sub('', out)
def get_title(self, entry):
title = entry.findtext('./')
return self.retitle.sub('', title).strip()
def get_authors(self, entry):
authortext = entry.find('./br').tail
if not self.rechkauth.search(authortext):
return []
authortext = self.rechkauth.sub('', authortext)
return [a.strip() for a in authortext.split('&')]
def get_rating(self, entrytable, verbose):
nbcomment = tostring(entrytable.getprevious())
try:
nbcomment = self.renbcom.search(nbcomment).group("nbcom")
except:
report(verbose)
return None
hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
float(image.get('height', default=0))) \
for image in entrytable.getiterator('img'))
#ratings as x/5
return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
def get_description(self, entry):
description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
description = self.redesc.search(description)
if not description or not description.group("desc"):
return None
#remove invalid tags
description = self.reimg.sub('', description.group("desc"))
description = self.recomment.sub('', description)
description = self.resanitize.sub('', sanitize_comments_html(description))
return _('SUMMARY:\n %s') % re.sub(r'\n\s+</p>','\n</p>', description)
def get_publisher(self, entry):
publisher = self.output_entry(entry.xpath('./p')[1])
publisher = filter(lambda x: self.repub.search(x) is not None,
self.resplitbr.split(publisher))
if not len(publisher):
return None
publisher = self.repub.sub('', publisher[0])
return publisher.split(',')[0].strip()
def get_tags(self, entry):
tag = self.output_entry(entry.xpath('./p')[1])
tag = filter(lambda x: self.retag.search(x) is not None,
self.resplitbr.split(tag))
if not len(tag):
return []
return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
def get_date(self, entry, verbose):
date = self.output_entry(entry.xpath('./p')[1])
date = filter(lambda x: self.redate.search(x) is not None,
self.resplitbr.split(date))
if not len(date):
return None
try:
d = self.redate.sub('', date[0])
if d:
default = utcnow().replace(day=15)
d = parse_date(d, assume_utc=True, default=default)
else:
d = None
except:
report(verbose)
d = None
return d
def get_ISBN(self, entry):
isbns = self.output_entry(entry.xpath('./p')[2])
isbns = filter(lambda x: self.reisbn.search(x) is not None,
self.resplitbrdiv.split(isbns))
if not len(isbns):
return None
isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
def fill_MI(self, entry, title, authors, ratings, verbose):
mi = MetaInformation(title, authors)
mi.rating = ratings
mi.comments = self.get_description(entry)
mi.publisher = self.get_publisher(entry)
mi.tags = self.get_tags(entry)
mi.pubdate = self.get_date(entry, verbose)
mi.isbn = self.get_ISBN(entry)
mi.author_sort = authors_to_sort_string(authors)
return mi
def get_individual_metadata(self, browser, linkdata, verbose):
try:
raw = browser.open_novisit(self.BASE_URL + linkdata).read()
except Exception as e:
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
raise FictionwiseError(_('Fictionwise encountered an error.'))
if '<title>404 - ' in raw:
report(verbose)
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
return soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
return soupparser.fromstring(clean_ascii_chars(raw))
except:
return None
def populate(self, entries, browser, verbose=False):
inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
'ul': False, 'span': False}
inv_xpath =('./table',)
#single entry
if len(entries) == 1 and not isinstance(entries[0], str):
try:
entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
title = self.get_title(entry)
#maybe strenghten the search
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
authors = self.get_authors(entry)
except Exception as e:
if verbose:
print _('Failed to get all details for an entry')
print e
return
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
else:
#multiple entries
for x in entries:
try:
entry = self.get_individual_metadata(browser, x, verbose)
entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
title = self.get_title(entry)
#maybe strenghten the search
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
authors = self.get_authors(entry)
except Exception as e:
if verbose:
print _('Failed to get all details for an entry')
print e
continue
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
def search(title=None, author=None, publisher=None, isbn=None,
min_viewability='none', verbose=False, max_results=5,
keywords=None):
br = browser()
entries = Query(title=title, author=author, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
#List of entry
ans = ResultList()
ans.populate(entries, br, verbose)
return ans
def option_parser():
parser = OptionParser(textwrap.dedent(\
_('''\
%prog [options]
Fetch book metadata from Fictionwise. You must specify one of title, author,
or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
so you should make your query as specific as possible.
''')
))
parser.add_option('-t', '--title', help=_('Book title'))
parser.add_option('-a', '--author', help=_('Book author(s)'))
parser.add_option('-p', '--publisher', help=_('Book publisher'))
parser.add_option('-k', '--keywords', help=_('Keywords'))
parser.add_option('-m', '--max-results', default=20,
help=_('Maximum number of results to fetch'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Be more verbose about errors'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
try:
results = search(opts.title, opts.author, publisher=opts.publisher,
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
except AssertionError:
report(True)
parser.print_help()
return 1
if results is None or len(results) == 0:
print _('No result found for this search!')
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
print
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,247 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, textwrap
from urllib import urlencode
from functools import partial
from lxml import etree
from calibre import browser, preferred_encoding
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'dc': 'http://purl.org/dc/terms'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
total_results = XPath('//openSearch:totalResults')
start_index = XPath('//openSearch:startIndex')
items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
creator = XPath('descendant::dc:creator')
identifier = XPath('descendant::dc:identifier')
title = XPath('descendant::dc:title')
date = XPath('descendant::dc:date')
publisher = XPath('descendant::dc:publisher')
subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language')
def report(verbose):
if verbose:
import traceback
traceback.print_exc()
class Query(object):
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
def __init__(self, title=None, author=None, publisher=None, isbn=None,
max_results=20, min_viewability='none', start_index=1):
assert not(title is None and author is None and publisher is None and \
isbn is None)
assert (max_results < 21)
assert (min_viewability in ('none', 'partial', 'full'))
q = ''
if isbn is not None:
q += 'isbn:'+isbn
else:
def build_term(prefix, parts):
return ' '.join('in'+prefix + ':' + x for x in parts)
if title is not None:
q += build_term('title', title.split())
if author is not None:
q += ('+' if q else '')+build_term('author', author.split())
if publisher is not None:
q += ('+' if q else '')+build_term('publisher', publisher.split())
if isinstance(q, unicode):
q = q.encode('utf-8')
self.url = self.BASE_URL+urlencode({
'q':q,
'max-results':max_results,
'start-index':start_index,
'min-viewability':min_viewability,
})
def __call__(self, browser, verbose):
if verbose:
print 'Query:', self.url
feed = etree.fromstring(browser.open(self.url).read())
#print etree.tostring(feed, pretty_print=True)
total = int(total_results(feed)[0].text)
start = int(start_index(feed)[0].text)
entries = entry(feed)
new_start = start + len(entries)
if new_start > total:
new_start = 0
return entries, new_start
class ResultList(list):
def get_description(self, entry, verbose):
try:
desc = description(entry)
if desc:
return 'SUMMARY:\n'+desc[0].text
except:
report(verbose)
def get_language(self, entry, verbose):
try:
l = language(entry)
if l:
return l[0].text
except:
report(verbose)
def get_title(self, entry):
candidates = [x.text for x in title(entry)]
return ': '.join(candidates)
def get_authors(self, entry):
m = creator(entry)
if not m:
m = []
m = [x.text for x in m]
return m
def get_author_sort(self, entry, verbose):
for x in creator(entry):
for key, val in x.attrib.items():
if key.endswith('file-as'):
return val
def get_identifiers(self, entry, mi):
isbns = []
for x in identifier(entry):
t = str(x.text).strip()
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
if t[:5].upper() == 'ISBN:':
isbns.append(t[5:])
if isbns:
mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
def get_tags(self, entry, verbose):
try:
btags = [x.text for x in subject(entry)]
tags = []
for t in btags:
tags.extend([y.strip() for y in t.split('/')])
tags = list(sorted(list(set(tags))))
except:
report(verbose)
tags = []
return [x.replace(',', ';') for x in tags]
def get_publisher(self, entry, verbose):
try:
pub = publisher(entry)[0].text
except:
pub = None
return pub
def get_date(self, entry, verbose):
try:
d = date(entry)
if d:
default = utcnow().replace(day=15)
d = parse_date(d[0].text, assume_utc=True, default=default)
else:
d = None
except:
report(verbose)
d = None
return d
def populate(self, entries, browser, verbose=False):
for x in entries:
try:
id_url = entry_id(x)[0].text
title = self.get_title(x)
except:
report(verbose)
mi = MetaInformation(title, self.get_authors(x))
try:
raw = browser.open(id_url).read()
feed = etree.fromstring(raw)
x = entry(feed)[0]
except Exception as e:
if verbose:
print 'Failed to get all details for an entry'
print e
mi.author_sort = self.get_author_sort(x, verbose)
mi.comments = self.get_description(x, verbose)
self.get_identifiers(x, mi)
mi.tags = self.get_tags(x, verbose)
mi.publisher = self.get_publisher(x, verbose)
mi.pubdate = self.get_date(x, verbose)
mi.language = self.get_language(x, verbose)
self.append(mi)
def search(title=None, author=None, publisher=None, isbn=None,
min_viewability='none', verbose=False, max_results=40):
br = browser()
br.set_handle_gzip(True)
start, entries = 1, []
while start > 0 and len(entries) <= max_results:
new, start = Query(title=title, author=author, publisher=publisher,
isbn=isbn, min_viewability=min_viewability)(br, verbose)
if not new:
break
entries.extend(new)
entries = entries[:max_results]
ans = ResultList()
ans.populate(entries, br, verbose)
return ans
def option_parser():
parser = OptionParser(textwrap.dedent(
'''\
%prog [options]
Fetch book metadata from Google. You must specify one of title, author,
publisher or ISBN. If you specify ISBN the others are ignored. Will
fetch a maximum of 100 matches, so you should make your query as
specific as possible.
'''
))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-i', '--isbn', help='Book ISBN')
parser.add_option('-m', '--max-results', default=10,
help='Maximum number of results to fetch')
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
try:
results = search(opts.title, opts.author, opts.publisher, opts.isbn,
verbose=opts.verbose, max_results=opts.max_results)
except AssertionError:
report(True)
parser.print_help()
return 1
for result in results:
print unicode(result).encode(preferred_encoding)
print
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,159 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Interface to isbndb.com. My key HLLXQX2A.
'''
import sys, re
from urllib import quote
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre import browser
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
class ISBNDBError(Exception):
pass
def fetch_metadata(url, max=3, timeout=5.):
books = []
page_number = 1
total_results = 31
br = browser()
while len(books) < total_results and max > 0:
try:
raw = br.open(url, timeout=timeout).read()
except Exception as err:
raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
soup = BeautifulStoneSoup(raw,
convertEntities=BeautifulStoneSoup.XML_ENTITIES)
book_list = soup.find('booklist')
if book_list is None:
errmsg = soup.find('errormessage').string
raise ISBNDBError('Error fetching metadata: '+errmsg)
total_results = int(book_list['total_results'])
page_number += 1
np = '&page_number=%s&'%page_number
url = re.sub(r'\&page_number=\d+\&', np, url)
books.extend(book_list.findAll('bookdata'))
max -= 1
return books
class ISBNDBMetadata(Metadata):
def __init__(self, book):
Metadata.__init__(self, None)
def tostring(e):
if not hasattr(e, 'string'):
return None
ans = e.string
if ans is not None:
ans = unicode(ans).strip()
if not ans:
ans = None
return ans
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
title = tostring(book.find('titlelong'))
if not title:
title = tostring(book.find('title'))
self.title = title
self.title = unicode(self.title).strip()
authors = []
au = tostring(book.find('authorstext'))
if au:
au = au.strip()
temp = au.split(',')
for au in temp:
if not au: continue
authors.extend([a.strip() for a in au.split('&amp;')])
if authors:
self.authors = authors
try:
self.author_sort = tostring(book.find('authors').find('person'))
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
except:
pass
self.publisher = tostring(book.find('publishertext'))
summ = tostring(book.find('summary'))
if summ:
self.comments = 'SUMMARY:\n'+summ
def build_isbn(base_url, opts):
return base_url + 'index1=isbn&value1='+opts.isbn
def build_combined(base_url, opts):
query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
if e is not None ])
query = query.strip()
if len(query) == 0:
raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
query = re.sub(r'\s+', '+', query)
if isinstance(query, unicode):
query = query.encode('utf-8')
return base_url+'index1=combined&value1='+quote(query, '+')
def option_parser():
parser = OptionParser(usage=\
_('''
%prog [options] key
Fetch metadata for books from isndb.com. You can specify either the
books ISBN ID or its title and author. If you specify the title and author,
then more than one book may be returned.
key is the account key you generate after signing up for a free account from isbndb.com.
'''))
parser.add_option('-i', '--isbn', default=None, dest='isbn',
help=_('The ISBN ID of the book you want metadata for.'))
parser.add_option('-a', '--author', dest='author',
default=None, help=_('The author whose book to search for.'))
parser.add_option('-t', '--title', dest='title',
default=None, help=_('The title of the book to search for.'))
parser.add_option('-p', '--publisher', default=None, dest='publisher',
help=_('The publisher of the book to search for.'))
parser.add_option('-v', '--verbose', default=False,
action='store_true', help=_('Verbose processing'))
return parser
def create_books(opts, args, timeout=5.):
base_url = BASE_URL%dict(key=args[1])
if opts.isbn is not None:
url = build_isbn(base_url, opts)
else:
url = build_combined(base_url, opts)
if opts.verbose:
print ('ISBNDB query: '+url)
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
#remove duplicates ISBN
return list(dict((book.isbn, book) for book in tans).values())
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print ('You must supply the isbndb.com key')
return 1
for book in create_books(opts, args):
print unicode(book).encode('utf-8')
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -400,7 +400,8 @@ class MetadataUpdater(object):
if getattr(self, 'exth', None) is None:
raise MobiError('No existing EXTH record. Cannot update metadata.')
self.record0[92:96] = iana2mobi(mi.language)
if not mi.is_null('language'):
self.record0[92:96] = iana2mobi(mi.language)
self.create_exth(exth=exth, new_title=mi.title)
# Fetch updated timestamp, cover_record, thumbnail_record

View File

@ -1,411 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en'
import sys, textwrap, re, traceback, socket
from urllib import urlencode
from math import ceil
from copy import deepcopy
from lxml.html import soupparser
from calibre.utils.date import parse_date, utcnow, replace_months
from calibre.utils.cleantext import clean_ascii_chars
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
authors_to_sort_string
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.ebooks.metadata.covers import CoverDownload
from calibre.utils.config import OptionParser
class NiceBooks(MetadataSource):
name = 'Nicebooks'
description = _('Downloads metadata from french Nicebooks')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Sengian'
version = (1, 0, 0)
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10, verbose=self.verbose)
except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
class NiceBooksCovers(CoverDownload):
name = 'Nicebooks covers'
description = _('Downloads covers from french Nicebooks')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Sengian'
type = _('Cover download')
version = (1, 0, 0)
def has_cover(self, mi, ans, timeout=5.):
if not mi.isbn:
return False
br = browser()
try:
entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
if Covers(mi.isbn)(entry).check_cover():
self.debug('cover for', mi.isbn, 'found')
ans.set()
except Exception as e:
self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.):
if not mi.isbn:
return
br = browser()
try:
entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
cover_data, ext = Covers(mi.isbn)(entry).get_cover(br, timeout)
if not ext:
ext = 'jpg'
result_queue.put((True, cover_data, ext, self.name))
except Exception as e:
result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name))
class NiceBooksError(Exception):
pass
class ISBNNotFound(NiceBooksError):
pass
def report(verbose):
if verbose:
traceback.print_exc()
class Query(object):
BASE_URL = 'http://fr.nicebooks.com/'
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None, max_results=20):
assert not(title is None and author is None and publisher is None \
and isbn is None and keywords is None)
assert (max_results < 21)
self.max_results = int(max_results)
if isbn is not None:
q = isbn
else:
q = ' '.join([i for i in (title, author, publisher, keywords) \
if i is not None])
if isinstance(q, unicode):
q = q.encode('utf-8')
self.urldata = 'search?' + urlencode({'q':q,'s':'Rechercher'})
def __call__(self, browser, verbose, timeout = 5.):
if verbose:
print _('Query: %s') % self.BASE_URL+self.urldata
try:
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
except Exception as e:
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
raise NiceBooksError(_('Nicebooks encountered an error.'))
if '<title>404 - ' in raw:
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
feed = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
feed = soupparser.fromstring(clean_ascii_chars(raw))
except:
return None
#nb of page to call
try:
nbresults = int(feed.xpath("//div[@id='topbar']/b")[0].text)
except:
#direct hit
return [feed]
nbpagetoquery = int(ceil(float(min(nbresults, self.max_results))/10))
pages =[feed]
if nbpagetoquery > 1:
for i in xrange(2, nbpagetoquery + 1):
try:
urldata = self.urldata + '&p=' + str(i)
raw = browser.open_novisit(self.BASE_URL+urldata, timeout=timeout).read()
except Exception as e:
continue
if '<title>404 - ' in raw:
continue
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
feed = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
feed = soupparser.fromstring(clean_ascii_chars(raw))
except:
continue
pages.append(feed)
results = []
for x in pages:
results.extend([i.find_class('title')[0].get('href') \
for i in x.xpath("//ul[@id='results']/li")])
return results[:self.max_results]
class ResultList(list):
BASE_URL = 'http://fr.nicebooks.com'
def __init__(self):
self.repub = re.compile(u'\s*.diteur\s*', re.I)
self.reauteur = re.compile(u'\s*auteur.*', re.I)
self.reautclean = re.compile(u'\s*\(.*\)\s*')
def get_title(self, entry):
title = deepcopy(entry)
title.remove(title.find("dl[@title='Informations sur le livre']"))
title = ' '.join([i.text_content() for i in title.iterchildren()])
return unicode(title.replace('\n', ''))
def get_authors(self, entry):
author = entry.find("dl[@title='Informations sur le livre']")
authortext = []
for x in author.getiterator('dt'):
if self.reauteur.match(x.text):
elt = x.getnext()
while elt.tag == 'dd':
authortext.append(unicode(elt.text_content()))
elt = elt.getnext()
break
if len(authortext) == 1:
authortext = [self.reautclean.sub('', authortext[0])]
return authortext
def get_description(self, entry, verbose):
try:
return u'RESUME:\n' + unicode(entry.getparent().xpath("//p[@id='book-description']")[0].text)
except:
report(verbose)
return None
def get_book_info(self, entry, mi, verbose):
entry = entry.find("dl[@title='Informations sur le livre']")
for x in entry.getiterator('dt'):
if x.text == 'ISBN':
isbntext = x.getnext().text_content().replace('-', '')
if check_isbn(isbntext):
mi.isbn = unicode(isbntext)
elif self.repub.match(x.text):
mi.publisher = unicode(x.getnext().text_content())
elif x.text == 'Langue':
mi.language = unicode(x.getnext().text_content())
elif x.text == 'Date de parution':
d = x.getnext().text_content()
try:
default = utcnow().replace(day=15)
d = replace_months(d, 'fr')
d = parse_date(d, assume_utc=True, default=default)
mi.pubdate = d
except:
report(verbose)
return mi
def fill_MI(self, entry, title, authors, verbose):
mi = MetaInformation(title, authors)
mi.author_sort = authors_to_sort_string(authors)
mi.comments = self.get_description(entry, verbose)
return self.get_book_info(entry, mi, verbose)
def get_individual_metadata(self, browser, linkdata, verbose):
try:
raw = browser.open_novisit(self.BASE_URL + linkdata).read()
except Exception as e:
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
raise NiceBooksError(_('Nicebooks encountered an error.'))
if '<title>404 - ' in raw:
report(verbose)
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
feed = soupparser.fromstring(raw)
except:
try:
#remove ASCII invalid chars
feed = soupparser.fromstring(clean_ascii_chars(raw))
except:
return None
# get results
return feed.xpath("//div[@id='container']")[0]
def populate(self, entries, browser, verbose=False):
#single entry
if len(entries) == 1 and not isinstance(entries[0], str):
try:
entry = entries[0].xpath("//div[@id='container']")[0]
entry = entry.find("div[@id='book-info']")
title = self.get_title(entry)
authors = self.get_authors(entry)
except Exception as e:
if verbose:
print 'Failed to get all details for an entry'
print e
return
self.append(self.fill_MI(entry, title, authors, verbose))
else:
#multiple entries
for x in entries:
try:
entry = self.get_individual_metadata(browser, x, verbose)
entry = entry.find("div[@id='book-info']")
title = self.get_title(entry)
authors = self.get_authors(entry)
except Exception as e:
if verbose:
print 'Failed to get all details for an entry'
print e
continue
self.append(self.fill_MI(entry, title, authors, verbose))
class Covers(object):
def __init__(self, isbn = None):
assert isbn is not None
self.urlimg = ''
self.isbn = isbn
self.isbnf = False
def __call__(self, entry = None):
try:
self.urlimg = entry.xpath("//div[@id='book-picture']/a")[0].get('href')
except:
return self
isbno = entry.get_element_by_id('book-info').find("dl[@title='Informations sur le livre']")
for x in isbno.getiterator('dt'):
if x.text == 'ISBN' and check_isbn(x.getnext().text_content()):
self.isbnf = True
break
return self
def check_cover(self):
return True if self.urlimg else False
def get_cover(self, browser, timeout = 5.):
try:
cover, ext = browser.open_novisit(self.urlimg, timeout=timeout).read(), \
self.urlimg.rpartition('.')[-1]
return cover, ext if ext else 'jpg'
except Exception as err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
if not len(self.urlimg):
if not self.isbnf:
raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
def search(title=None, author=None, publisher=None, isbn=None,
max_results=5, verbose=False, keywords=None):
br = browser()
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
if entries is None or len(entries) == 0:
return None
#List of entry
ans = ResultList()
ans.populate(entries, br, verbose)
return ans
def check_for_cover(isbn):
br = browser()
entry = Query(isbn=isbn, max_results=1)(br, False)[0]
return Covers(isbn)(entry).check_cover()
def cover_from_isbn(isbn, timeout = 5.):
br = browser()
entry = Query(isbn=isbn, max_results=1)(br, False, timeout)[0]
return Covers(isbn)(entry).get_cover(br, timeout)
def option_parser():
parser = OptionParser(textwrap.dedent(\
_('''\
%prog [options]
Fetch book metadata from Nicebooks. You must specify one of title, author,
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
so you should make your query as specific as possible.
It can also get covers if the option is activated.
''')
))
parser.add_option('-t', '--title', help=_('Book title'))
parser.add_option('-a', '--author', help=_('Book author(s)'))
parser.add_option('-p', '--publisher', help=_('Book publisher'))
parser.add_option('-i', '--isbn', help=_('Book ISBN'))
parser.add_option('-k', '--keywords', help=_('Keywords'))
parser.add_option('-c', '--covers', default=0,
help=_('Covers: 1-Check/ 2-Download'))
parser.add_option('-p', '--coverspath', default='',
help=_('Covers files path'))
parser.add_option('-m', '--max-results', default=20,
help=_('Maximum number of results to fetch'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Be more verbose about errors'))
return parser
def main(args=sys.argv):
import os
parser = option_parser()
opts, args = parser.parse_args(args)
try:
results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
except AssertionError:
report(True)
parser.print_help()
return 1
if results is None or len(results) == 0:
print _('No result found for this search!')
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
covact = int(opts.covers)
if covact == 1:
textcover = _('No cover found!')
if check_for_cover(result.isbn):
textcover = _('A cover was found for this book')
print textcover
elif covact == 2:
cover_data, ext = cover_from_isbn(result.isbn)
cpath = result.isbn
if len(opts.coverspath):
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
oname = os.path.abspath(cpath+'.'+ext)
open(oname, 'wb').write(cover_data)
print _('Cover saved to file '), oname
print
if __name__ == '__main__':
sys.exit(main())

View File

@ -966,7 +966,9 @@ class OPF(object): # {{{
cover_id = covers[0].get('content')
for item in self.itermanifest():
if item.get('id', None) == cover_id:
return item.get('href', None)
mt = item.get('media-type', '')
if 'xml' not in mt:
return item.get('href', None)
@dynamic_property
def cover(self):

View File

@ -301,7 +301,7 @@ class Amazon(Source):
if asin is None:
asin = identifiers.get('asin', None)
if asin:
return 'http://amzn.com/%s'%asin
return ('amazon', asin, 'http://amzn.com/%s'%asin)
# }}}
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{

View File

@ -56,7 +56,8 @@ class InternalMetadataCompareKeyGen(object):
'''
Generate a sort key for comparison of the relevance of Metadata objects,
given a search query.
given a search query. This is used only to compare results from the same
metadata source, not across different sources.
The sort key ensures that an ascending order sort is a sort by order of
decreasing relevance.
@ -306,7 +307,7 @@ class Source(Plugin):
title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
[
# Remove things like: (2010) (Omnibus) etc.
(r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|mass\s*market|edition|ed\.)[\])}]', ''),
(r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|turtleback|mass\s*market|edition|ed\.)[\])}]', ''),
# Remove any strings that contain the substring edition inside
# parentheses
(r'(?i)[({\[].*?(edition|ed.).*?[\]})]', ''),
@ -374,7 +375,11 @@ class Source(Plugin):
def get_book_url(self, identifiers):
'''
Return the URL for the book identified by identifiers at this source.
Return a 3-tuple or None. The 3-tuple is of the form:
(identifier_type, identifier_value, URL).
The URL is the URL for the book identified by identifiers at this
source. identifier_type, identifier_value specify the identifier
corresponding to the URL.
This URL must be browseable to by a human using a browser. It is meant
to provide a clickable link for the user to easily visit the books page
at this source.

View File

@ -19,13 +19,8 @@ from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.metadata.sources.base import create_log
from calibre.ebooks.metadata.sources.identify import identify
from calibre.ebooks.metadata.sources.covers import download_cover
from calibre.utils.config import test_eight_code
def option_parser():
if not test_eight_code:
from calibre.ebooks.metadata.fetch import option_parser
return option_parser()
parser = OptionParser(textwrap.dedent(
'''\
%prog [options]
@ -48,9 +43,6 @@ def option_parser():
return parser
def main(args=sys.argv):
if not test_eight_code:
from calibre.ebooks.metadata.fetch import main
return main(args)
parser = option_parser()
opts, args = parser.parse_args(args)

View File

@ -173,7 +173,7 @@ class GoogleBooks(Source):
def get_book_url(self, identifiers): # {{{
goog = identifiers.get('google', None)
if goog is not None:
return 'http://books.google.com/books?id=%s'%goog
return ('google', goog, 'http://books.google.com/books?id=%s'%goog)
# }}}
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{

View File

@ -13,6 +13,7 @@ from Queue import Queue, Empty
from threading import Thread
from io import BytesIO
from operator import attrgetter
from urlparse import urlparse
from calibre.customize.ui import metadata_plugins, all_metadata_plugins
from calibre.ebooks.metadata.sources.base import create_log, msprefs
@ -400,6 +401,9 @@ def identify(log, abort, # {{{
and plugin.get_cached_cover_url(result.identifiers) is not
None)
result.identify_plugin = plugin
if msprefs['txt_comments']:
if plugin.has_html_comments and result.comments:
result.comments = html2text(result.comments)
log('The identify phase took %.2f seconds'%(time.time() - start_time))
log('The longest time (%f) was taken by:'%longest, lp)
@ -410,10 +414,6 @@ def identify(log, abort, # {{{
log('We have %d merged results, merging took: %.2f seconds' %
(len(results), time.time() - start_time))
if msprefs['txt_comments']:
for r in results:
if r.identify_plugin.has_html_comments and r.comments:
r.comments = html2text(r.comments)
max_tags = msprefs['max_tags']
for r in results:
@ -435,18 +435,38 @@ def identify(log, abort, # {{{
# }}}
def urls_from_identifiers(identifiers): # {{{
identifiers = dict([(k.lower(), v) for k, v in identifiers.iteritems()])
ans = []
for plugin in all_metadata_plugins():
try:
url = plugin.get_book_url(identifiers)
if url is not None:
ans.append((plugin.name, url))
id_type, id_val, url = plugin.get_book_url(identifiers)
ans.append((plugin.name, id_type, id_val, url))
except:
pass
isbn = identifiers.get('isbn', None)
if isbn:
ans.append((isbn,
'http://www.worldcat.org/search?q=bn%%3A%s&qt=advanced'%isbn))
ans.append((isbn, 'isbn', isbn,
'http://www.worldcat.org/isbn/'+isbn))
doi = identifiers.get('doi', None)
if doi:
ans.append(('DOI', 'doi', doi,
'http://dx.doi.org/'+doi))
arxiv = identifiers.get('arxiv', None)
if arxiv:
ans.append(('arXiv', 'arxiv', arxiv,
'http://arxiv.org/abs/'+arxiv))
oclc = identifiers.get('oclc', None)
if oclc:
ans.append(('OCLC', 'oclc', oclc,
'http://www.worldcat.org/oclc/'+oclc))
url = identifiers.get('uri', None)
if url is None:
url = identifiers.get('url', None)
if url and url.startswith('http'):
url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
parts = urlparse(url)
name = parts.netloc
ans.append((name, 'url', url, url))
return ans
# }}}

View File

@ -81,7 +81,7 @@ class ISBNDB(Source):
author_tokens = self.get_author_tokens(authors,
only_first_author=True)
tokens += author_tokens
tokens = [quote(t) for t in tokens]
tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in tokens]
q = '+'.join(tokens)
q = 'index1=combined&value1='+q

View File

@ -41,7 +41,7 @@ class OverDrive(Source):
cached_cover_url_is_reliable = True
options = (
Option('get_full_metadata', 'bool', False,
Option('get_full_metadata', 'bool', True,
_('Download all metadata (slow)'),
_('Enable this option to gather all metadata available from Overdrive.')),
)
@ -265,7 +265,7 @@ class OverDrive(Source):
if creators:
creators = creators.split(', ')
# if an exact match in a preferred format occurs
if ((author and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
if ((author and creators and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
return self.format_results(reserveid, od_title, subtitle, series, publisher,
creators, thumbimage, worldcatlink, formatid)
else:
@ -291,7 +291,7 @@ class OverDrive(Source):
close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
else:
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
elif close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))

View File

@ -222,7 +222,7 @@ class SaveWorker(Thread):
if isbytestring(fpath):
fpath = fpath.decode(filesystem_encoding)
formats[fmt.lower()] = fpath
data[i] = [opf, cpath, formats]
data[i] = [opf, cpath, formats, mi.last_modified.isoformat()]
return data
def run(self):

View File

@ -253,6 +253,8 @@ class MobiReader(object):
.italic { font-style: italic }
.underline { text-decoration: underline }
.mbp_pagebreak {
page-break-after: always; margin: 0; display: block
}
@ -601,6 +603,9 @@ class MobiReader(object):
elif tag.tag == 'i':
tag.tag = 'span'
tag.attrib['class'] = 'italic'
elif tag.tag == 'u':
tag.tag = 'span'
tag.attrib['class'] = 'underline'
elif tag.tag == 'b':
tag.tag = 'span'
tag.attrib['class'] = 'bold'

View File

@ -7,6 +7,8 @@ __docformat__ = 'restructuredtext en'
Convert an ODT file into a Open Ebook
'''
import os
from lxml import etree
from odf.odf2xhtml import ODF2XHTML
from calibre import CurrentDir, walk
@ -23,7 +25,51 @@ class Extract(ODF2XHTML):
with open(name, 'wb') as f:
f.write(data)
def __call__(self, stream, odir):
def filter_css(self, html, log):
root = etree.fromstring(html)
style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
if style:
style = style[0]
css = style.text
if css:
style.text, sel_map = self.do_filter_css(css)
for x in root.xpath('//*[@class]'):
extra = []
orig = x.get('class')
for cls in orig.split():
extra.extend(sel_map.get(cls, []))
if extra:
x.set('class', orig + ' ' + ' '.join(extra))
html = etree.tostring(root, encoding='utf-8',
xml_declaration=True)
return html
def do_filter_css(self, css):
from cssutils import parseString
from cssutils.css import CSSRule
sheet = parseString(css)
rules = list(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
sel_map = {}
count = 0
for r in rules:
# Check if we have only class selectors for this rule
nc = [x for x in r.selectorList if not
x.selectorText.startswith('.')]
if len(r.selectorList) > 1 and not nc:
# Replace all the class selectors with a single class selector
# This will be added to the class attribute of all elements
# that have one of these selectors.
replace_name = 'c_odt%d'%count
count += 1
for sel in r.selectorList:
s = sel.selectorText[1:]
if s not in sel_map:
sel_map[s] = []
sel_map[s].append(replace_name)
r.selectorText = '.'+replace_name
return sheet.cssText, sel_map
def __call__(self, stream, odir, log):
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
@ -32,13 +78,17 @@ class Extract(ODF2XHTML):
if not os.path.exists(odir):
os.makedirs(odir)
with CurrentDir(odir):
print 'Extracting ODT file...'
log('Extracting ODT file...')
html = self.odf2xhtml(stream)
# A blanket img specification like this causes problems
# with EPUB output as the contaiing element often has
# with EPUB output as the containing element often has
# an absolute height and width set that is larger than
# the available screen real estate
html = html.replace('img { width: 100%; height: 100%; }', '')
try:
html = self.filter_css(html, log)
except:
log.exception('Failed to filter CSS, conversion may be slow')
with open('index.xhtml', 'wb') as f:
f.write(html.encode('utf-8'))
zf = ZipFile(stream, 'r')
@ -67,7 +117,7 @@ class ODTInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
return Extract()(stream, '.')
return Extract()(stream, '.', log)
def postprocess_book(self, oeb, opts, log):
# Fix <p><div> constructs as the asinine epubchecker complains

View File

@ -16,7 +16,7 @@ from urllib import unquote as urlunquote
from lxml import etree, html
from calibre.constants import filesystem_encoding, __version__
from calibre.translations.dynamic import translate
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
from calibre import isbytestring, as_unicode, get_types_map
@ -446,22 +446,23 @@ class NullContainer(object):
class DirContainer(object):
"""Filesystem directory container."""
def __init__(self, path, log):
def __init__(self, path, log, ignore_opf=False):
self.log = log
if isbytestring(path):
path = path.decode(filesystem_encoding)
self.opfname = None
ext = os.path.splitext(path)[1].lower()
if ext == '.opf':
self.opfname = os.path.basename(path)
self.rootdir = os.path.dirname(path)
return
self.rootdir = path
for path in self.namelist():
ext = os.path.splitext(path)[1].lower()
if ext == '.opf':
self.opfname = path
return
self.opfname = None
if not ignore_opf:
for path in self.namelist():
ext = os.path.splitext(path)[1].lower()
if ext == '.opf':
self.opfname = path
return
def read(self, path):
if path is None:
@ -852,6 +853,7 @@ class Manifest(object):
self.oeb.log.debug('Parsing', self.href, '...')
# Convert to Unicode and normalize line endings
data = self.oeb.decode(data)
data = strip_encoding_declarations(data)
data = self.oeb.html_preprocessor(data)
# There could be null bytes in data if it had &#0; entities in it
data = data.replace('\0', '')
@ -1047,8 +1049,8 @@ class Manifest(object):
# Remove hyperlinks with no content as they cause rendering
# artifacts in browser based renderers
# Also remove empty <b> and <i> tags
for a in xpath(data, '//h:a[@href]|//h:i|//h:b'):
# Also remove empty <b>, <u> and <i> tags
for a in xpath(data, '//h:a[@href]|//h:i|//h:b|//h:u'):
if a.get('id', None) is None and a.get('name', None) is None \
and len(a) == 0 and not a.text:
remove_elem(a)

View File

@ -125,7 +125,19 @@ class Stylizer(object):
def __init__(self, tree, path, oeb, opts, profile=None,
extra_css='', user_css=''):
self.oeb, self.opts = oeb, opts
self.profile = opts.input_profile
self.profile = profile
if self.profile is None:
# Use the default profile. This should really be using
# opts.output_profile, but I don't want to risk changing it, as
# doing so might well have hard to debug font size effects.
from calibre.customize.ui import output_profiles
for x in output_profiles():
if x.short_name == 'default':
self.profile = x
break
if self.profile is None:
# Just in case the default profile is removed in the future :)
self.profile = opts.output_profile
self.logger = oeb.logger
item = oeb.manifest.hrefs[path]
basename = os.path.basename(path)

View File

@ -36,7 +36,7 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
m.clear('description')
m.add('description', mi.comments)
elif override_input_metadata:
m.clear('description')
m.clear('description')
if not mi.is_null('publisher'):
m.clear('publisher')
m.add('publisher', mi.publisher)

View File

@ -16,6 +16,7 @@ from calibre import CurrentDir
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ptempfile import TemporaryFile
from calibre.utils.magick import Image, create_canvas
from calibre.ebooks.compression.palmdoc import decompress_doc
DATATYPE_PHTML = 0
DATATYPE_PHTML_COMPRESSED = 1
@ -359,7 +360,7 @@ class Reader(FormatReader):
# plugin assemble the order based on hyperlinks.
with CurrentDir(output_dir):
for uid, num in self.uid_text_secion_number.items():
self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid)))
self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid))
with open('%s.html' % uid, 'wb') as htmlf:
html = u'<html><body>'
section_header, section_data = self.sections[num]
@ -465,7 +466,7 @@ class Reader(FormatReader):
if not home_html:
home_html = self.uid_text_secion_number.items()[0][0]
except:
raise Exception(_('Could not determine home.html'))
raise Exception('Could not determine home.html')
# Generate oeb from html conversion.
oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
self.options.debug_pipeline = odi

View File

@ -32,10 +32,11 @@ class PDFInput(InputFormatPlugin):
def convert_new(self, stream, accelerators):
from calibre.ebooks.pdf.reflow import PDFDocument
from calibre.utils.cleantext import clean_ascii_chars
if pdfreflow_err:
raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
pdfreflow.reflow(stream.read(), 1, -1)
xml = open('index.xml', 'rb').read()
xml = clean_ascii_chars(open('index.xml', 'rb').read())
PDFDocument(xml, self.opts, self.log)
return os.path.join(os.getcwd(), 'metadata.opf')

View File

@ -15,7 +15,6 @@ import cStringIO
from lxml import etree
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.filenames import ascii_text
from calibre.utils.magick.draw import save_cover_data_to, identify_data
TAGS = {
@ -79,8 +78,7 @@ def txt2rtf(text):
elif val <= 127:
buf.write(x)
else:
repl = ascii_text(x)
c = r'\uc{2}\u{0:d}{1}'.format(val, repl, len(repl))
c = r'\u{0:d}?'.format(val)
buf.write(c)
return buf.getvalue()

View File

@ -34,7 +34,7 @@ if isosx:
)
gprefs.defaults['action-layout-toolbar'] = (
'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
'Choose Library', 'Donate', None, 'Fetch News', 'Store', 'Save To Disk',
'Connect Share', None, 'Remove Books',
)
gprefs.defaults['action-layout-toolbar-device'] = (
@ -48,7 +48,7 @@ else:
gprefs.defaults['action-layout-menubar-device'] = ()
gprefs.defaults['action-layout-toolbar'] = (
'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
'Choose Library', 'Donate', None, 'Fetch News', 'Store', 'Save To Disk',
'Connect Share', None, 'Remove Books', None, 'Help', 'Preferences',
)
gprefs.defaults['action-layout-toolbar-device'] = (
@ -739,12 +739,6 @@ def build_forms(srcdir, info=None):
dat = dat.replace('from QtWebKit.QWebView import QWebView',
'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
if form.endswith('viewer%smain.ui'%os.sep):
info('\t\tPromoting WebView')
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(')
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
open(compiled_form, 'wb').write(dat)
_df = os.environ.get('CALIBRE_DEVELOP_FROM', None)

View File

@ -20,9 +20,8 @@ from calibre.ebooks import BOOK_EXTENSIONS
from calibre.utils.filenames import ascii_filename
from calibre.constants import preferred_encoding, filesystem_encoding
from calibre.gui2.actions import InterfaceAction
from calibre.gui2 import config, question_dialog
from calibre.gui2 import question_dialog
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.config import test_eight_code
from calibre.ebooks.metadata.sources.base import msprefs
def get_filters():
@ -180,26 +179,17 @@ class AddAction(InterfaceAction):
except IndexError:
self.gui.library_view.model().books_added(self.isbn_add_dialog.value)
self.isbn_add_dialog.accept()
if test_eight_code:
orig = msprefs['ignore_fields']
new = list(orig)
for x in ('title', 'authors'):
if x in new:
new.remove(x)
msprefs['ignore_fields'] = new
try:
self.gui.iactions['Edit Metadata'].download_metadata(
ids=self.add_by_isbn_ids)
finally:
msprefs['ignore_fields'] = orig
else:
orig = config['overwrite_author_title_metadata']
config['overwrite_author_title_metadata'] = True
try:
self.gui.iactions['Edit Metadata'].do_download_metadata(
self.add_by_isbn_ids)
finally:
config['overwrite_author_title_metadata'] = orig
orig = msprefs['ignore_fields']
new = list(orig)
for x in ('title', 'authors'):
if x in new:
new.remove(x)
msprefs['ignore_fields'] = new
try:
self.gui.iactions['Edit Metadata'].download_metadata(
ids=self.add_by_isbn_ids)
finally:
msprefs['ignore_fields'] = orig
return

View File

@ -246,7 +246,8 @@ class ChooseLibraryAction(InterfaceAction):
def delete_requested(self, name, location):
loc = location.replace('/', os.sep)
if not question_dialog(self.gui, _('Are you sure?'), '<p>'+
_('All files from %s will be '
_('<b style="color: red">All files</b> (not just ebooks) '
'from <br><br><b>%s</b><br><br> will be '
'<b>permanently deleted</b>. Are you sure?') % loc,
show_copy_button=False):
return

View File

@ -10,15 +10,13 @@ from functools import partial
from PyQt4.Qt import Qt, QMenu, QModelIndex, QTimer
from calibre.gui2 import error_dialog, config, Dispatcher, question_dialog
from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
from calibre.gui2 import error_dialog, Dispatcher, question_dialog
from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.dialogs.tag_list_editor import TagListEditor
from calibre.gui2.actions import InterfaceAction
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.icu import sort_key
from calibre.utils.config import test_eight_code
class EditMetadataAction(InterfaceAction):
@ -36,22 +34,8 @@ class EditMetadataAction(InterfaceAction):
md.addAction(_('Edit metadata in bulk'),
partial(self.edit_metadata, False, bulk=True))
md.addSeparator()
if test_eight_code:
dall = self.download_metadata
else:
dall = partial(self.download_metadata_old, False, covers=True)
dident = partial(self.download_metadata_old, False, covers=False)
dcovers = partial(self.download_metadata_old, False, covers=True,
set_metadata=False, set_social_metadata=False)
md.addAction(_('Download metadata and covers'), dall,
md.addAction(_('Download metadata and covers'), self.download_metadata,
Qt.ControlModifier+Qt.Key_D)
if not test_eight_code:
md.addAction(_('Download only metadata'), dident)
md.addAction(_('Download only covers'), dcovers)
md.addAction(_('Download only social metadata'),
partial(self.download_metadata_old, False, covers=False,
set_metadata=False, set_social_metadata=True))
self.metadata_menu = md
mb = QMenu()
@ -88,7 +72,7 @@ class EditMetadataAction(InterfaceAction):
_('No books selected'), show=True)
db = self.gui.library_view.model().db
ids = [db.id(row.row()) for row in rows]
from calibre.gui2.metadata.bulk_download2 import start_download
from calibre.gui2.metadata.bulk_download import start_download
start_download(self.gui, ids,
Dispatcher(self.metadata_downloaded))
@ -96,7 +80,7 @@ class EditMetadataAction(InterfaceAction):
if job.failed:
self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
return
from calibre.gui2.metadata.bulk_download2 import get_job_details
from calibre.gui2.metadata.bulk_download import get_job_details
id_map, failed_ids, failed_covers, all_failed, det_msg = \
get_job_details(job)
if all_failed:
@ -112,8 +96,9 @@ class EditMetadataAction(InterfaceAction):
show_copy_button = False
if failed_ids or failed_covers:
show_copy_button = True
num = len(failed_ids.union(failed_covers))
msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
' "Show details" to see which books.')%len(failed_ids)
' "Show details" to see which books.')%num
payload = (id_map, failed_ids, failed_covers)
from calibre.gui2.dialogs.message_box import ProceedNotification
@ -158,49 +143,6 @@ class EditMetadataAction(InterfaceAction):
self.apply_metadata_changes(id_map)
def download_metadata_old(self, checked, covers=True, set_metadata=True,
set_social_metadata=None):
rows = self.gui.library_view.selectionModel().selectedRows()
if not rows or len(rows) == 0:
d = error_dialog(self.gui, _('Cannot download metadata'),
_('No books selected'))
d.exec_()
return
db = self.gui.library_view.model().db
ids = [db.id(row.row()) for row in rows]
self.do_download_metadata(ids, covers=covers,
set_metadata=set_metadata,
set_social_metadata=set_social_metadata)
def do_download_metadata(self, ids, covers=True, set_metadata=True,
set_social_metadata=None):
m = self.gui.library_view.model()
db = m.db
if set_social_metadata is None:
get_social_metadata = config['get_social_metadata']
else:
get_social_metadata = set_social_metadata
from calibre.gui2.metadata.bulk_download import DoDownload
if set_social_metadata is not None and set_social_metadata:
x = _('social metadata')
else:
x = _('covers') if covers and not set_metadata else _('metadata')
title = _('Downloading {0} for {1} book(s)').format(x, len(ids))
self._download_book_metadata = DoDownload(self.gui, title, db, ids,
get_covers=covers, set_metadata=set_metadata,
get_social_metadata=get_social_metadata)
m.stop_metadata_backup()
try:
self._download_book_metadata.exec_()
finally:
m.start_metadata_backup()
cr = self.gui.library_view.currentIndex().row()
x = self._download_book_metadata
if x.updated:
self.gui.library_view.model().refresh_ids(
x.updated, cr)
if self.gui.cover_flow:
self.gui.cover_flow.dataChanged()
# }}}
def edit_metadata(self, checked, bulk=None):
@ -227,9 +169,7 @@ class EditMetadataAction(InterfaceAction):
list(range(self.gui.library_view.model().rowCount(QModelIndex())))
current_row = row_list.index(cr)
func = (self.do_edit_metadata if test_eight_code else
self.do_edit_metadata_old)
changed, rows_to_refresh = func(row_list, current_row)
changed, rows_to_refresh = self.do_edit_metadata(row_list, current_row)
m = self.gui.library_view.model()
@ -244,36 +184,6 @@ class EditMetadataAction(InterfaceAction):
m.current_changed(current, previous)
self.gui.tags_view.recount()
def do_edit_metadata_old(self, row_list, current_row):
changed = set([])
db = self.gui.library_view.model().db
while True:
prev = next_ = None
if current_row > 0:
prev = db.title(row_list[current_row-1])
if current_row < len(row_list) - 1:
next_ = db.title(row_list[current_row+1])
d = MetadataSingleDialog(self.gui, row_list[current_row], db,
prev=prev, next_=next_)
d.view_format.connect(lambda
fmt:self.gui.iactions['View'].view_format(row_list[current_row],
fmt))
ret = d.exec_()
d.break_cycles()
if ret != d.Accepted:
break
changed.add(d.id)
self.gui.library_view.model().refresh_ids(list(d.books_to_refresh))
if d.row_delta == 0:
break
current_row += d.row_delta
self.gui.library_view.set_current_row(current_row)
self.gui.library_view.scroll_to_row(current_row)
return changed, set()
def do_edit_metadata(self, row_list, current_row):
from calibre.gui2.metadata.single import edit_metadata
db = self.gui.library_view.model().db
@ -613,6 +523,7 @@ class EditMetadataAction(InterfaceAction):
self.applied_ids, cr)
if self.gui.cover_flow:
self.gui.cover_flow.dataChanged()
self.gui.tags_view.recount()
self.apply_id_map = []
self.apply_pd = None

View File

@ -10,7 +10,7 @@ from PyQt4.Qt import QIcon, QMenu, Qt
from calibre.gui2.actions import InterfaceAction
from calibre.gui2.preferences.main import Preferences
from calibre.gui2 import error_dialog
from calibre.constants import DEBUG
from calibre.constants import DEBUG, isosx
class PreferencesAction(InterfaceAction):
@ -19,7 +19,8 @@ class PreferencesAction(InterfaceAction):
def genesis(self):
pm = QMenu()
pm.addAction(QIcon(I('config.png')), _('Preferences'), self.do_config)
acname = _('Change calibre behavior') if isosx else _('Preferences')
pm.addAction(QIcon(I('config.png')), acname, self.do_config)
pm.addAction(QIcon(I('wizard.png')), _('Run welcome wizard'),
self.gui.run_wizard)
if not DEBUG:

View File

@ -60,7 +60,7 @@ class ViewAction(InterfaceAction):
def build_menus(self, db):
self.view_menu.clear()
self.view_menu.addAction(self.qaction)
self.view_menu.addAction(self.view_action)
self.view_menu.addAction(self.view_specific_action)
self.view_menu.addSeparator()
self.view_menu.addAction(self.action_pick_random)

Some files were not shown because too many files have changed in this diff Show More