mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync with trunk. Revision 9165
This commit is contained in:
commit
7bd9cd20fe
@ -30,3 +30,4 @@ nbproject/
|
|||||||
.project
|
.project
|
||||||
.pydevproject
|
.pydevproject
|
||||||
.settings/
|
.settings/
|
||||||
|
*.DS_Store
|
||||||
|
100
Changelog.yaml
100
Changelog.yaml
@ -19,6 +19,106 @@
|
|||||||
# new recipes:
|
# new recipes:
|
||||||
# - title:
|
# - title:
|
||||||
|
|
||||||
|
- version: 0.8.0
|
||||||
|
date: 2010-05-06
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Go to http://calibre-ebook.com/new-in/eight to see what's new in 0.8.0"
|
||||||
|
type: major
|
||||||
|
|
||||||
|
- version: 0.7.59
|
||||||
|
date: 2011-04-30
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fixes a bug in 0.7.58 that caused too small fonts when converting to MOBI for the Kindle. Apologies."
|
||||||
|
|
||||||
|
- title: "Apple driver: Handle invalid EPUBs that do not contain an OPF file"
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: The Big Picture and Auto industry news
|
||||||
|
author: welovelucy
|
||||||
|
|
||||||
|
- title: Gazeta Prawna
|
||||||
|
author: Vroo
|
||||||
|
|
||||||
|
- title: Various Czech news sources
|
||||||
|
author: Tomas Latal
|
||||||
|
|
||||||
|
- title: Diario de Ibiza
|
||||||
|
author: Joan Tur
|
||||||
|
|
||||||
|
- version: 0.7.58
|
||||||
|
date: 2011-04-29
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Support for converting and reading metadata from Plucker format PDB files"
|
||||||
|
type: major
|
||||||
|
|
||||||
|
- title: "The metadata that is displayed in the book details panel on the right is now completely configurable via Preferences->Look & Feel"
|
||||||
|
|
||||||
|
- title: "Add a column that shows the date when the metadata of a book record was last modified in calibre. To see the column, right click on the column headers in calibre and select Show column->Modified. Note that the dates may be incorrect for books added with older versions of calibre."
|
||||||
|
|
||||||
|
- title: "Add command line option to shutdown running calibre"
|
||||||
|
|
||||||
|
- title: "CHM Input: Store extracted files in the input/ sub dir for easy debugging when --debug-pipeline is specified"
|
||||||
|
|
||||||
|
- title: "Add a popup menu to the 'Create saved search button' to allow easy deleting of saved searches"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix regression that broke converting to LIT in 0.7.57"
|
||||||
|
tickets: [769334]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Remove encoding declarations from input HTML documents to guarantee that there is only a single encoding declaration in the output HTML."
|
||||||
|
tickets: [773337]
|
||||||
|
|
||||||
|
- title: "Correctly parenthesize searches that are used to make search restrictions"
|
||||||
|
|
||||||
|
- title: "Fix ratings in save to disk templates not being divided by 2"
|
||||||
|
|
||||||
|
- title: "TXT to EPUB: Underlined words (following quotes?) fail to become italics"
|
||||||
|
tickets: [772267]
|
||||||
|
|
||||||
|
- title: "Fix template function source code unavailable when not running calibre from source"
|
||||||
|
|
||||||
|
- title: "Fix adding html books from the top of a deep folder hierarchy very slow"
|
||||||
|
|
||||||
|
- title: "Only set language in MOBI metadata if it is not null"
|
||||||
|
|
||||||
|
- title: "Fix 'count-of' searches (e.g., tags:#>3)."
|
||||||
|
tickets: [771175]
|
||||||
|
|
||||||
|
- title: "Fix regression that broke connection to iTunes in some cases"
|
||||||
|
tickets: [771164]
|
||||||
|
|
||||||
|
- title: "Fix buggy regex that made converting PDFs with the string ****************** very slow"
|
||||||
|
tickets: [770534]
|
||||||
|
|
||||||
|
- title: "Fix Ctrl+L shortcut to lookup word not working in ebook viewer"
|
||||||
|
tickets: [769492]
|
||||||
|
|
||||||
|
- title: "Fix regression that broke searching on boolean columns"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- HBR Blogs
|
||||||
|
- The Marker
|
||||||
|
- Financial Times
|
||||||
|
- Clarin
|
||||||
|
- Honolulu Star Advertiser
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Novi Standard
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: Autobild.ro and Social Diva
|
||||||
|
author: Silviu Cotoara
|
||||||
|
|
||||||
|
- title: Novinky
|
||||||
|
author: Tomas Latal
|
||||||
|
|
||||||
|
- title: "De Volksrant (subscriber version)"
|
||||||
|
author: Selcal
|
||||||
|
|
||||||
|
|
||||||
- version: 0.7.57
|
- version: 0.7.57
|
||||||
date: 2011-04-22
|
date: 2011-04-22
|
||||||
|
|
||||||
|
16
recipes/auto_blog.recipe
Normal file
16
recipes/auto_blog.recipe
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AutoBlog(BasicNewsRecipe):
|
||||||
|
title = u'Auto Blog'
|
||||||
|
__author__ = 'Welovelucy'
|
||||||
|
language = 'en'
|
||||||
|
description = 'Auto industry news'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'AutoBlog', u'http://www.autoblog.com/rss.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + 'print/'
|
||||||
|
|
||||||
|
|
55
recipes/autobild.recipe
Normal file
55
recipes/autobild.recipe
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
auto-bild.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AutoBild(BasicNewsRecipe):
|
||||||
|
title = u'Auto Bild'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'Auto'
|
||||||
|
publisher = 'Auto Bild'
|
||||||
|
oldest_article = 50
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Auto'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.auto-bild.ro/images/autobild.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'box_2 articol clearfix'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['detail']})
|
||||||
|
, dict(name='a', attrs={'id':['zoom_link']})
|
||||||
|
, dict(name='div', attrs={'class':['icons clearfix']})
|
||||||
|
, dict(name='div', attrs={'class':['pub_articol clearfix']})
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':['pub_articol clearfix']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.auto-bild.ro/rss/toate')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
12
recipes/big_picture.recipe
Normal file
12
recipes/big_picture.recipe
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class BigPicture(BasicNewsRecipe):
|
||||||
|
title = u'The Big Picture'
|
||||||
|
__author__ = 'Welovelucy'
|
||||||
|
description = ('Macro perspective on capital markets, economy, technology'
|
||||||
|
' and digital media')
|
||||||
|
language = 'en'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'Big Picture', u'http://feeds.feedburner.com/TheBigPicture')]
|
@ -3,7 +3,8 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
|
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
|
||||||
__version__ = '0.98' # 2011-04-10
|
__version__ = '0.98'
|
||||||
|
|
||||||
''' http://brandeins.de - Wirtschaftsmagazin '''
|
''' http://brandeins.de - Wirtschaftsmagazin '''
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
@ -13,8 +14,8 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
|||||||
class BrandEins(BasicNewsRecipe):
|
class BrandEins(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'brand eins'
|
title = u'brand eins'
|
||||||
__author__ = 'Constantin Hofstetter; Steffen Siebert'
|
__author__ = 'Constantin Hofstetter'
|
||||||
description = u'Wirtschaftsmagazin: Gets the last full issue on default. Set a integer value for the username-field to get older issues: 1 -> the newest (but not complete) issue, 2 -> the last complete issue (default), 3 -> the issue before 2 etc.'
|
description = u'Wirtschaftsmagazin'
|
||||||
publisher ='brandeins.de'
|
publisher ='brandeins.de'
|
||||||
category = 'politics, business, wirtschaft, Germany'
|
category = 'politics, business, wirtschaft, Germany'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
@ -105,10 +106,11 @@ class BrandEins(BasicNewsRecipe):
|
|||||||
keys = issue_map.keys()
|
keys = issue_map.keys()
|
||||||
keys.sort()
|
keys.sort()
|
||||||
keys.reverse()
|
keys.reverse()
|
||||||
selected_issue = issue_map[keys[issue-1]]
|
selected_issue_key = keys[issue - 1]
|
||||||
|
selected_issue = issue_map[selected_issue_key]
|
||||||
url = selected_issue.get('href', False)
|
url = selected_issue.get('href', False)
|
||||||
# Get the title for the magazin - build it out of the title of the cover - take the issue and year;
|
# Get the title for the magazin - build it out of the title of the cover - take the issue and year;
|
||||||
self.title = "brand eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d)", selected_issue.find('img').get('title', False)).group('date')
|
self.title = "brand eins " + selected_issue_key[4:] + "/" + selected_issue_key[0:4]
|
||||||
url = 'http://brandeins.de/'+url
|
url = 'http://brandeins.de/'+url
|
||||||
|
|
||||||
# url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
|
# url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
|
||||||
@ -161,3 +163,4 @@ class BrandEins(BasicNewsRecipe):
|
|||||||
current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
|
current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
|
||||||
titles_and_articles.append([chapter_title, current_articles])
|
titles_and_articles.append([chapter_title, current_articles])
|
||||||
return titles_and_articles
|
return titles_and_articles
|
||||||
|
|
||||||
|
55
recipes/diario_ibiza.recipe
Normal file
55
recipes/diario_ibiza.recipe
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Joan Tur, based on El Pais version by Jordi Balcells & elargentino.com version by Darko Miletic'
|
||||||
|
description = 'Principal periodico de las islas Pitiusas, Ibiza y Formentera (Espanya) - v1.06 (29/04/2011)'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
diariodeibiza.es
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DiarioDeIbiza(BasicNewsRecipe):
|
||||||
|
__author__ = 'Joan Tur, cullet'
|
||||||
|
description = 'Principal periodico de las islas Pitiusas, Ibiza y Formentera (Espanya) - v1.06 (29/04/2011)'
|
||||||
|
|
||||||
|
cover_url = 'http://estaticos01.diariodeibiza.es//elementosWeb/mediaweb/images/logo.jpg'
|
||||||
|
title = u'Diario de Ibiza digital'
|
||||||
|
publisher = u'Editorial Prensa Iberica'
|
||||||
|
category = 'News, politics, culture, economy, general interest'
|
||||||
|
language = 'es'
|
||||||
|
|
||||||
|
encoding = 'iso-8859-1'
|
||||||
|
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
recursion = 5
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['noticia_titular','epigrafe','subtitulo','actualizada','noticia_fecha','noticia_texto']}),
|
||||||
|
dict(name='font', attrs={'class':['actualizada']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Portada de Ibiza', u'http://www.diariodeibiza.es/elementosInt/rss/1'),
|
||||||
|
(u'Pitiuses i Balears', u'http://www.diariodeibiza.es/elementosInt/rss/2'),
|
||||||
|
(u'Opini\xf3n', u'http://www.diariodeibiza.es/elementosInt/rss/3'),
|
||||||
|
(u'Nacional', u'http://www.diariodeibiza.es/elementosInt/rss/4'),
|
||||||
|
(u'Internacional', u'http://www.diariodeibiza.es/elementosInt/rss/5'),
|
||||||
|
(u'Econom\xeda', u'http://www.diariodeibiza.es/elementosInt/rss/6'),
|
||||||
|
(u'Deportes', u'http://www.diariodeibiza.es/elementosInt/rss/7'),
|
||||||
|
(u'Sociedad', u'http://www.diariodeibiza.es/elementosInt/rss/8'),
|
||||||
|
(u'Ciencia', u'http://www.diariodeibiza.es/elementosInt/rss/11'),
|
||||||
|
(u'Tecnolog\xeda', u'http://www.diariodeibiza.es/elementosInt/rss/12'),
|
||||||
|
(u'Gente', u'http://www.diariodeibiza.es/elementosInt/rss/13'),
|
||||||
|
(u'Sucesos', u'http://www.diariodeibiza.es/elementosInt/rss/15'),
|
||||||
|
(u'Cultura', u'http://www.diariodeibiza.es/elementosInt/rss/16Piti')
|
||||||
|
]
|
||||||
|
|
37
recipes/digizone.recipe
Normal file
37
recipes/digizone.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DigiZoneCZ(BasicNewsRecipe):
|
||||||
|
title = 'DigiZone'
|
||||||
|
__author__ = 'Tomas Latal'
|
||||||
|
__version__ = '1.0'
|
||||||
|
__date__ = '30 April 2011'
|
||||||
|
description = u'Aktuality a \u010dl\xe1nky z DigiZone.cz'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
encoding = 'iso-8859-2'
|
||||||
|
publisher = 'Internet Info s.r.o.'
|
||||||
|
category = 'digitalni vysilani, televize, CZ'
|
||||||
|
language = 'cs'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
extra_css = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
|
||||||
|
p.perex img {display:none;} \
|
||||||
|
.urs p {margin: 0 0 0.8em 0;}'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Aktuality', u'http://rss.digizone.cz/aktuality'),
|
||||||
|
(u'\u010cl\xe1nky', u'http://rss.digizone.cz/clanky')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id=['p-article','p-actuality'])
|
||||||
|
|
||||||
|
remove_tags_after = dict(id=['p-article','p-actuality'])
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':['path','mth','lbtr','serial','enquiry','links','dp-n','side','op-ab','op-view','op-sub','op-list',]}),
|
||||||
|
dict(id=['opinions','discussionList','similarItems','sidebar','footer','opl','promo-box'])
|
||||||
|
]
|
@ -12,7 +12,6 @@ class AdvancedUserRecipe1301860159(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en_EN'
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
|
||||||
remove_tags = [dict(name='a'),dict(name='hr')]
|
remove_tags = [dict(name='a'),dict(name='hr')]
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
foxnews.com
|
foxnews.com
|
||||||
'''
|
'''
|
||||||
@ -23,6 +23,7 @@ class FoxNews(BasicNewsRecipe):
|
|||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: Arial,sans-serif }
|
body{font-family: Arial,sans-serif }
|
||||||
.caption{font-size: x-small}
|
.caption{font-size: x-small}
|
||||||
|
.author,.dateline{font-size: small}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
@ -34,12 +35,12 @@ class FoxNews(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_attributes = ['xmlns','lang']
|
remove_attributes = ['xmlns','lang']
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags=[
|
||||||
dict(name=['object','embed','link','script','iframe','meta','base'])
|
dict(attrs={'class':['user-control','logo','ad-300x250','url-description']})
|
||||||
,dict(attrs={'class':['user-control','url-description','ad-context']})
|
,dict(name=['meta','base','link','iframe','object','embed'])
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_before=dict(name='h1')
|
keep_only_tags=[dict(attrs={'id':'article-print'})]
|
||||||
remove_tags_after =dict(attrs={'class':'url-description'})
|
remove_tags_after =dict(attrs={'class':'url-description'})
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -55,3 +56,24 @@ class FoxNews(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + 'print'
|
return url + 'print'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
||||||
|
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
__copyright__ = u'2010-2011, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||||
'''
|
'''
|
||||||
frazpc.pl
|
frazpc.pl
|
||||||
'''
|
'''
|
||||||
@ -19,17 +19,20 @@ class FrazPC(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
feeds = [(u'Aktualno\u015bci', u'http://www.frazpc.pl/feed'), (u'Recenzje', u'http://www.frazpc.pl/kat/recenzje-2/feed') ]
|
feeds = [
|
||||||
|
(u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'),
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'FRAZ_CONTENT'})]
|
(u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
|
||||||
|
|
||||||
remove_tags = [dict(name='p', attrs={'class':'gray tagsP fs11'})]
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
|
||||||
[(r'<div id="post-[0-9]*"', lambda match: '<div id="FRAZ_CONTENT"'),
|
|
||||||
(r'href="/f/news/', lambda match: 'href="http://www.frazpc.pl/f/news/'),
|
|
||||||
(r' <a href="http://www.frazpc.pl/[^>]*?">(Skomentuj|Komentarz(e)?\([0-9]*\))</a> \|', lambda match: '')]
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':'title-wrapper'}),
|
||||||
|
dict(name='p', attrs={'class':'tags'}),
|
||||||
|
dict(name='p', attrs={'class':'article-links'}),
|
||||||
|
dict(name='div', attrs={'class':'comments_box'})
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]
|
||||||
|
|
||||||
remove_attributes = [ 'width', 'height' ]
|
remove_attributes = [ 'width', 'height' ]
|
||||||
|
53
recipes/gazeta-prawna-calibre-v1.recipe
Normal file
53
recipes/gazeta-prawna-calibre-v1.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Vroo <vroobelek@iq.pl>'
|
||||||
|
__author__ = u'Vroo'
|
||||||
|
'''
|
||||||
|
gazetaprawna.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class gazetaprawna(BasicNewsRecipe):
|
||||||
|
version = 1
|
||||||
|
title = u'Gazeta Prawna'
|
||||||
|
__author__ = u'Vroo'
|
||||||
|
publisher = u'Infor Biznes'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
description = 'Polski dziennik gospodarczy'
|
||||||
|
language = 'pl'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':['data-art']})
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['dodatki_artykulu','data-art']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Wiadomo\u015bci - najwa\u017cniejsze', u'http://www.gazetaprawna.pl/wiadomosci/najwazniejsze/rss.xml'),
|
||||||
|
(u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'),
|
||||||
|
(u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'),
|
||||||
|
(u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'),
|
||||||
|
(u'Podatki i rachunkowo\u015b\u0107', u'http://podatki.gazetaprawna.pl/rss.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
url = url.replace('wiadomosci/artykuly', 'drukowanie')
|
||||||
|
url = url.replace('artykuly', 'drukowanie')
|
||||||
|
url = url.replace('porady', 'drukowanie')
|
||||||
|
url = url.replace('wywiady', 'drukowanie')
|
||||||
|
url = url.replace('orzeczenia', 'drukowanie')
|
||||||
|
url = url.replace('galeria', 'drukowanie')
|
||||||
|
url = url.replace('komentarze', 'drukowanie')
|
||||||
|
url = url.replace('biznes.gazetaprawna', 'www.gazetaprawna')
|
||||||
|
url = url.replace('podatki.gazetaprawna', 'www.gazetaprawna')
|
||||||
|
url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna')
|
||||||
|
url = url.replace('praca.gazetaprawna', 'www.gazetaprawna')
|
||||||
|
return url
|
@ -1,9 +1,6 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Needed for BLOGs
|
|
||||||
from calibre.web.feeds import Feed
|
|
||||||
|
|
||||||
class HBR(BasicNewsRecipe):
|
class HBR(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Harvard Business Review Blogs'
|
title = 'Harvard Business Review Blogs'
|
||||||
@ -32,6 +29,7 @@ class HBR(BasicNewsRecipe):
|
|||||||
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
|
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
else:
|
else:
|
||||||
timefmt = ' [%B %Y]'
|
timefmt = ' [%B %Y]'
|
||||||
|
|
||||||
@ -59,9 +57,9 @@ class HBR(BasicNewsRecipe):
|
|||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
br.open(self.LOGIN_URL)
|
br.open(self.LOGIN_URL)
|
||||||
br.select_form(name='signInForm')
|
br.select_form(name='signin-form')
|
||||||
br['signInForm:username'] = self.username
|
br['signin-form:username'] = self.username
|
||||||
br['signInForm:password'] = self.password
|
br['signin-form:password'] = self.password
|
||||||
raw = br.submit().read()
|
raw = br.submit().read()
|
||||||
if 'My Account' not in raw:
|
if 'My Account' not in raw:
|
||||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||||
@ -161,27 +159,13 @@ class HBR(BasicNewsRecipe):
|
|||||||
return startDate, endDate
|
return startDate, endDate
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------------------------
|
||||||
def hbr_parse_blogs(self, feeds):
|
|
||||||
# Do the "official" parse_feeds first
|
|
||||||
rssFeeds = Feed()
|
|
||||||
|
|
||||||
# Use the PARSE_FEEDS method to get a Feeds object of the articles
|
|
||||||
rssFeeds = BasicNewsRecipe.parse_feeds(self)
|
|
||||||
|
|
||||||
# Create a new feed of the right configuration and append to existing afeeds
|
|
||||||
self.feed_to_index_append(rssFeeds[:], feeds)
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------------------------
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
if self.INCLUDE_ARTICLES == True:
|
if self.INCLUDE_ARTICLES == True:
|
||||||
soup = self.hbr_get_toc()
|
soup = self.hbr_get_toc()
|
||||||
feeds = self.hbr_parse_toc(soup)
|
feeds = self.hbr_parse_toc(soup)
|
||||||
else:
|
else:
|
||||||
feeds = []
|
return BasicNewsRecipe.parse_index(self)
|
||||||
|
|
||||||
# blog stuff
|
|
||||||
if self.INCLUDE_BLOGS == True:
|
|
||||||
self.hbr_parse_blogs(feeds)
|
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
#-------------------------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------------------------
|
||||||
|
BIN
recipes/icons/autobild.png
Normal file
BIN
recipes/icons/autobild.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 614 B |
BIN
recipes/icons/novistandard.png
Normal file
BIN
recipes/icons/novistandard.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/socialdiva.png
Normal file
BIN
recipes/icons/socialdiva.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.0 KiB |
@ -16,7 +16,7 @@ class Jezebel(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = True
|
||||||
language = 'en'
|
language = 'en'
|
||||||
masthead_url = 'http://cache.gawkerassets.com/assets/jezebel.com/img/logo.png'
|
masthead_url = 'http://cache.gawkerassets.com/assets/jezebel.com/img/logo.png'
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
@ -32,13 +32,12 @@ class Jezebel(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
feeds = [(u'Articles', u'http://feeds.gawker.com/jezebel/vip?format=xml')]
|
||||||
keep_only_tags = [dict(attrs={'class':'content permalink'})]
|
|
||||||
remove_tags_before = dict(name='h1')
|
remove_tags = [
|
||||||
remove_tags = [dict(attrs={'class':'contactinfo'})]
|
{'class': 'feedflare'},
|
||||||
remove_tags_after = dict(attrs={'class':'contactinfo'})
|
]
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://feeds.gawker.com/jezebel/full')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
36
recipes/korea_herald.recipe
Normal file
36
recipes/korea_herald.recipe
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
|
||||||
|
'''
|
||||||
|
Profile to download KoreaHerald
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class KoreaHerald(BasicNewsRecipe):
|
||||||
|
title = u'KoreaHerald'
|
||||||
|
language = 'en'
|
||||||
|
description = u'Korea Herald News articles'
|
||||||
|
__author__ = 'Seongkyoun Yoo'
|
||||||
|
oldest_article = 10
|
||||||
|
recursions = 3
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
no_stylesheets = True
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(id=['contentLeft', '_article'])
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
|
||||||
|
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('All News','http://www.koreaherald.com/rss/020000000000.xml'),
|
||||||
|
('National','http://www.koreaherald.com/rss/020100000000.xml'),
|
||||||
|
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
|
||||||
|
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
|
||||||
|
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
|
||||||
|
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
|
||||||
|
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
|
||||||
|
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
|
||||||
|
]
|
@ -16,7 +16,7 @@ class Kotaku(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = True
|
||||||
language = 'en'
|
language = 'en'
|
||||||
masthead_url = 'http://cache.gawkerassets.com/assets/kotaku.com/img/logo.png'
|
masthead_url = 'http://cache.gawkerassets.com/assets/kotaku.com/img/logo.png'
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
@ -31,13 +31,12 @@ class Kotaku(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
feeds = [(u'Articles', u'http://feeds.gawker.com/kotaku/vip?format=xml')]
|
||||||
keep_only_tags = [dict(attrs={'class':'content permalink'})]
|
|
||||||
remove_tags_before = dict(name='h1')
|
remove_tags = [
|
||||||
remove_tags = [dict(attrs={'class':'contactinfo'})]
|
{'class': 'feedflare'},
|
||||||
remove_tags_after = dict(attrs={'class':'contactinfo'})
|
]
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://feeds.gawker.com/kotaku/full')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -48,7 +48,7 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
if alink.string is not None:
|
if alink.string is not None:
|
||||||
tstr = alink.string
|
tstr = alink.string
|
||||||
alink.replaceWith(tstr)
|
alink.replaceWith(tstr)
|
||||||
return soup
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'([0-9])%'), lambda m: m.group(1) + ' %'),
|
(re.compile(r'([0-9])%'), lambda m: m.group(1) + ' %'),
|
||||||
|
37
recipes/lupa.recipe
Normal file
37
recipes/lupa.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class LupaCZ(BasicNewsRecipe):
|
||||||
|
title = 'Lupa'
|
||||||
|
__author__ = 'Tomas Latal'
|
||||||
|
__version__ = '1.0'
|
||||||
|
__date__ = '30 April 2011'
|
||||||
|
description = u'Zpr\xe1vi\u010dky a \u010dl\xe1nky z Lupa.cz'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Internet Info s.r.o.'
|
||||||
|
category = 'IT,news,CZ'
|
||||||
|
language = 'cs'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
extra_css = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
|
||||||
|
p.perex img {display:none;} \
|
||||||
|
.urs p {margin: 0 0 0.8em 0;}'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Zpr\xe1vi\u010dky', u'http://rss.lupa.cz/zpravicky'),
|
||||||
|
(u'\u010cl\xe1nky', u'http://rss.lupa.cz/clanky')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id='main')
|
||||||
|
|
||||||
|
remove_tags_after = [dict(id='main')]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
|
||||||
|
dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
|
||||||
|
]
|
37
recipes/mesec.recipe
Normal file
37
recipes/mesec.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MesecCZ(BasicNewsRecipe):
|
||||||
|
title = u'M\u011b\u0161ec'
|
||||||
|
__author__ = 'Tomas Latal'
|
||||||
|
__version__ = '1.0'
|
||||||
|
__date__ = '30 April 2011'
|
||||||
|
description = u'Zpr\xe1vi\u010dky a \u010dl\xe1nky z Mesec.cz'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Internet Info s.r.o.'
|
||||||
|
category = 'finance,CZ'
|
||||||
|
language = 'cs'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
extra_css = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
|
||||||
|
p.perex img {display:none;} \
|
||||||
|
.urs p {margin: 0 0 0.8em 0;}'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Aktuality', u'http://www.mesec.cz/rss/aktuality/'),
|
||||||
|
(u'\u010cl\xe1nky', u'http://www.mesec.cz/rss/clanky/')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id='main')
|
||||||
|
|
||||||
|
remove_tags_after = [dict(id='main')]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
|
||||||
|
dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
|
||||||
|
]
|
43
recipes/novinky.recipe
Normal file
43
recipes/novinky.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class NovinkyCZ(BasicNewsRecipe):
|
||||||
|
title = 'Novinky'
|
||||||
|
__author__ = 'Tomas Latal'
|
||||||
|
__version__ = '1.1'
|
||||||
|
__date__ = '30 April 2011'
|
||||||
|
description = 'News from server Novinky.cz'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Novinky'
|
||||||
|
category = 'news, CZ'
|
||||||
|
language = 'cs'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://img193.imageshack.us/img193/3039/novinkycover.jpg'
|
||||||
|
extra_css = 'p.acmDescription{font-style:italic;} p.acmAuthor{font-size:0.8em; color:#707070}'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Dom\xe1c\xed', u'http://www.novinky.cz/rss/domaci/'),
|
||||||
|
(u'Zahrani\u010d\xed', u'http://www.novinky.cz/rss/zahranicni/'),
|
||||||
|
(u'Krimi', u'http://www.novinky.cz/rss/krimi/'),
|
||||||
|
(u'Ekonomika', u'http://www.novinky.cz/rss/ekonomika/'),
|
||||||
|
(u'Finance', u'http://www.novinky.cz/rss/finance/'),
|
||||||
|
(u'Kultura', u'http://www.novinky.cz/rss/kultura/'),
|
||||||
|
(u'Koktejl', u'http://www.novinky.cz/rss/koktejl/'),
|
||||||
|
(u'Internet a PC', u'http://www.novinky.cz/rss/internet-a-pc/'),
|
||||||
|
(u'Auto-moto', u'http://www.novinky.cz/rss/auto/'),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id='articleContent')
|
||||||
|
|
||||||
|
remove_tags_after = [dict(id='movedArticleAuthors')]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':['articleColumnInfo','pictureInnerBox']}),
|
||||||
|
dict(name='p', attrs={'id':['articleDate']})
|
||||||
|
]
|
100
recipes/novistandard.recipe
Normal file
100
recipes/novistandard.recipe
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.standard.rs
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class NoviStandard(BasicNewsRecipe):
|
||||||
|
title = 'Novi Standard'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'NoviStandard - energija je neunistiva!'
|
||||||
|
publisher = 'Novi Standard'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
|
no_stylesheets = True
|
||||||
|
delay = 1
|
||||||
|
oldest_article = 15
|
||||||
|
encoding = 'utf-8'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
needs_subscription = 'optional'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
INDEX = 'http://www.standard.rs/'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'sr'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
masthead_url = 'http://www.standard.rs/templates/ja_opal/images/red/logo.png'
|
||||||
|
extra_css = """
|
||||||
|
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
body{font-family: Arial,"Segoe UI","Trebuchet MS",Helvetica,sans1,sans-serif}
|
||||||
|
.dropcap{font-family: Georgia,Times,serif1,serif; display:inline}
|
||||||
|
.dropcap:first-letter{display: inline; font-size: xx-large; font-weight: bold}
|
||||||
|
.contentheading{color: gray; font-size: x-large}
|
||||||
|
.article-meta, .createdby{color: red}
|
||||||
|
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open(self.INDEX)
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.select_form(name='login')
|
||||||
|
br['username'] = self.username
|
||||||
|
br['passwd' ] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
keep_only_tags =[dict(attrs={'class':['contentheading','article-meta','article-content']})]
|
||||||
|
remove_tags_after =dict(attrs={'class':'extravote-container'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','iframe','meta','base'])
|
||||||
|
,dict(attrs={'class':'extravote-container'})
|
||||||
|
]
|
||||||
|
remove_attributes =['border','background','height','width','align','valign','lang']
|
||||||
|
feeds = [
|
||||||
|
(u'Naslovna', u'http://www.standard.rs/index.php?format=feed&type=rss')
|
||||||
|
,(u'Politika', u'http://www.standard.rs/vesti/36-politika.html?format=feed&type=rss')
|
||||||
|
,(u'Cvijanovic preporucuje', u'http://www.standard.rs/-cvijanovi-vam-preporuuje.html?format=feed&type=rss')
|
||||||
|
,(u'Kolumne', u'http://www.standard.rs/vesti/49-kolumne.html?format=feed&type=rss')
|
||||||
|
,(u'Kultura', u'http://www.standard.rs/vesti/40-kultura.html?format=feed&type=rss')
|
||||||
|
,(u'Lifestyle', u'http://www.standard.rs/vesti/39-lifestyle.html?format=feed&type=rss')
|
||||||
|
,(u'Svet', u'http://www.standard.rs/vesti/41-svet.html?format=feed&type=rss')
|
||||||
|
,(u'Ekonomija', u'http://www.standard.rs/vesti/37-ekonomija.html?format=feed&type=rss')
|
||||||
|
,(u'Sport', u'http://www.standard.rs/vesti/38-sport.html?format=feed&type=rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('div'):
|
||||||
|
if len(item.contents) == 0:
|
||||||
|
item.extract()
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
37
recipes/podnikatel.recipe
Normal file
37
recipes/podnikatel.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class PodnikatelCZ(BasicNewsRecipe):
|
||||||
|
title = 'Podnikatel'
|
||||||
|
__author__ = 'Tomas Latal'
|
||||||
|
__version__ = '1.0'
|
||||||
|
__date__ = '30 April 2011'
|
||||||
|
description = u'Aktuality a \u010dl\xe1nky z Podnikatel.cz'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Internet Info s.r.o.'
|
||||||
|
category = 'podnikani, bussiness, CZ'
|
||||||
|
language = 'cs'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
extra_css = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
|
||||||
|
p.perex img {display:none;} \
|
||||||
|
.urs p {margin: 0 0 0.8em 0;}'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Aktuality', u'http://rss.podnikatel.cz/aktuality'),
|
||||||
|
(u'\u010cl\xe1nky', u'http://rss.podnikatel.cz/clanky')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id='art-content')
|
||||||
|
|
||||||
|
remove_tags_after = [dict(id='art-content')]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':['socialshare','box-blue','author clear','labels-terms','box diskuze','ad','page-nav right','infobox','box zpravy','s-clanky']}),
|
||||||
|
dict(id=['path','article-tools','discussionList','similarItems','promo-box'])
|
||||||
|
]
|
54
recipes/socialdiva.recipe
Normal file
54
recipes/socialdiva.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011'
|
||||||
|
'''
|
||||||
|
socialdiva.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SocialDiva(BasicNewsRecipe):
|
||||||
|
title = u'Social Diva'
|
||||||
|
__author__ = u'Silviu Cotoara'
|
||||||
|
description = u'When in doubt, wear red'
|
||||||
|
publisher = 'Social Diva'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Femei'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.socialdiva.ro/images/logo.png'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'col-alpha mt5 content_articol'}),
|
||||||
|
dict(name='div', attrs={'class':'mt5'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='a', attrs={'class':['comments float-left scroll mt5']}),
|
||||||
|
dict(name='a', attrs={'class':['comments float-left scroll']}),
|
||||||
|
dict(name='div', attrs={'class':['rating-container relative float-left']}),
|
||||||
|
dict(name='div', attrs={'class':['float-right social_articol']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='a', attrs={'class':['comments float-left scroll mt5']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.socialdiva.ro/rss.html')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
@ -18,7 +18,7 @@ class TelepolisNews(BasicNewsRecipe):
|
|||||||
recursion = 0
|
recursion = 0
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = "utf-8"
|
encoding = "utf-8"
|
||||||
language = 'de_AT'
|
language = 'de'
|
||||||
|
|
||||||
use_embedded_content =False
|
use_embedded_content =False
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||||
description = 'TheMarker Financial News in Hebrew'
|
description = 'TheMarker Financial News in Hebrew'
|
||||||
__author__ = 'TonyTheBookworm, Marbs'
|
__author__ = 'Marbs'
|
||||||
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
|
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
|
||||||
title = u'TheMarker'
|
title = u'TheMarker'
|
||||||
language = 'he'
|
language = 'he'
|
||||||
@ -11,42 +11,38 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ]
|
keep_only_tags =dict(name='div', attrs={'id':'content'})
|
||||||
max_articles_per_feed = 10
|
remove_attributes = ['width','float','margin-left']
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':['social-nav article-social-nav','prsnlArticleEnvelope','cb']}) ,
|
||||||
|
dict(name='a', attrs={'href':['/misc/mobile']}) ,
|
||||||
|
dict(name='span', attrs={'class':['post-summ']}) ]
|
||||||
|
max_articles_per_feed = 100
|
||||||
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
|
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
|
||||||
feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
|
feeds = [(u'Head Lines', u'http://www.themarker.com/cmlink/1.144'),
|
||||||
(u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
|
(u'TA Market', u'http://www.themarker.com/cmlink/1.243'),
|
||||||
(u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
|
(u'Real Estate', u'http://www.themarker.com/cmlink/1.605656'),
|
||||||
(u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
|
(u'Global', u'http://www.themarker.com/cmlink/1.605658'),
|
||||||
(u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
|
(u'Wall Street', u'http://www.themarker.com/cmlink/1.613713'),
|
||||||
(u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
|
(u'SmartPhone', u'http://www.themarker.com/cmlink/1.605661'),
|
||||||
(u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
|
(u'Law', u'http://www.themarker.com/cmlink/1.605664'),
|
||||||
(u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
|
(u'Media', u'http://www.themarker.com/cmlink/1.605660'),
|
||||||
(u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
|
(u'Consumer', u'http://www.themarker.com/cmlink/1.605662'),
|
||||||
(u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
|
(u'Career', u'http://www.themarker.com/cmlink/1.605665'),
|
||||||
(u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
|
(u'Car', u'http://www.themarker.com/cmlink/1.605663'),
|
||||||
|
(u'High Tech', u'http://www.themarker.com/cmlink/1.605659'),
|
||||||
|
(u'Small Business', u'http://www.themarker.com/cmlink/1.605666')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
split1 = url.split("=")
|
#split1 = url.split("/")
|
||||||
weblinks = url
|
#print_url='http://www.themarker.com/misc/article-print-page/'+split1[-1]
|
||||||
|
txt=url
|
||||||
|
|
||||||
if weblinks is not None:
|
re1='.*?' # Non-greedy match on filler
|
||||||
for link in weblinks:
|
re2='(tv)' # Word 1
|
||||||
#---------------------------------------------------------
|
|
||||||
#here we need some help with some regexpressions
|
|
||||||
#we are trying to find it.themarker.com in a url
|
|
||||||
#-----------------------------------------------------------
|
|
||||||
re1='.*?' # Non-greedy match on filler
|
|
||||||
re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1
|
|
||||||
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
|
|
||||||
m = rg.search(url)
|
|
||||||
|
|
||||||
|
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
|
||||||
if m:
|
m = rg.search(txt)
|
||||||
split2 = url.split("article/")
|
if m:
|
||||||
print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
|
#print 'bad link'
|
||||||
|
return 1
|
||||||
else:
|
|
||||||
print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
|
|
||||||
|
|
||||||
return print_url
|
|
||||||
|
@ -10,6 +10,8 @@ import re
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Time(BasicNewsRecipe):
|
class Time(BasicNewsRecipe):
|
||||||
|
recipe_disabled = ('This recipe has been disabled as TIME no longer'
|
||||||
|
' publish complete articles on the web.')
|
||||||
title = u'Time'
|
title = u'Time'
|
||||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||||
description = 'Weekly magazine'
|
description = 'Weekly magazine'
|
||||||
|
@ -7,13 +7,11 @@ usatoday.com
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag
|
|
||||||
import re
|
|
||||||
|
|
||||||
class USAToday(BasicNewsRecipe):
|
class USAToday(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'USA Today'
|
title = 'USA Today'
|
||||||
__author__ = 'GRiker'
|
__author__ = 'Kovid Goyal'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
timefmt = ''
|
timefmt = ''
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
@ -31,7 +29,6 @@ class USAToday(BasicNewsRecipe):
|
|||||||
margin-bottom: 0em; \
|
margin-bottom: 0em; \
|
||||||
font-size: smaller;}\n \
|
font-size: smaller;}\n \
|
||||||
.articleBody {text-align: left;}\n '
|
.articleBody {text-align: left;}\n '
|
||||||
conversion_options = { 'linearize_tables' : True }
|
|
||||||
#simultaneous_downloads = 1
|
#simultaneous_downloads = 1
|
||||||
feeds = [
|
feeds = [
|
||||||
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
|
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
|
||||||
@ -47,63 +44,26 @@ class USAToday(BasicNewsRecipe):
|
|||||||
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
|
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
|
||||||
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
|
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
|
||||||
]
|
]
|
||||||
keep_only_tags = [dict(attrs={'class':[
|
keep_only_tags = [dict(attrs={'class':'story'})]
|
||||||
'byLine',
|
remove_tags = [
|
||||||
'inside-copy',
|
dict(attrs={'class':[
|
||||||
'inside-head',
|
'share',
|
||||||
'inside-head2',
|
'reprints',
|
||||||
'item',
|
'inline-h3',
|
||||||
'item-block',
|
'info-extras',
|
||||||
'photo-container',
|
'ppy-outer',
|
||||||
]}),
|
'ppy-caption',
|
||||||
dict(id=[
|
'comments',
|
||||||
'applyMainStoryPhoto',
|
'jump',
|
||||||
'permalink',
|
'pagetools',
|
||||||
])]
|
'post-attributes',
|
||||||
|
'tags',
|
||||||
|
'bottom-tools',
|
||||||
|
'sponsoredlinks',
|
||||||
|
]}),
|
||||||
|
dict(id=['pluck']),
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'class':[
|
|
||||||
'comments',
|
|
||||||
'jump',
|
|
||||||
'pagetools',
|
|
||||||
'post-attributes',
|
|
||||||
'tags',
|
|
||||||
]}),
|
|
||||||
dict(id=[])]
|
|
||||||
|
|
||||||
#feeds = [('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles')]
|
|
||||||
|
|
||||||
def dump_hex(self, src, length=16):
|
|
||||||
''' Diagnostic '''
|
|
||||||
FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
|
|
||||||
N=0; result=''
|
|
||||||
while src:
|
|
||||||
s,src = src[:length],src[length:]
|
|
||||||
hexa = ' '.join(["%02X"%ord(x) for x in s])
|
|
||||||
s = s.translate(FILTER)
|
|
||||||
result += "%04X %-*s %s\n" % (N, length*3, hexa, s)
|
|
||||||
N+=length
|
|
||||||
print result
|
|
||||||
|
|
||||||
def fixChars(self,string):
|
|
||||||
# Replace lsquo (\x91)
|
|
||||||
fixed = re.sub("\x91","‘",string)
|
|
||||||
|
|
||||||
# Replace rsquo (\x92)
|
|
||||||
fixed = re.sub("\x92","’",fixed)
|
|
||||||
|
|
||||||
# Replace ldquo (\x93)
|
|
||||||
fixed = re.sub("\x93","“",fixed)
|
|
||||||
|
|
||||||
# Replace rdquo (\x94)
|
|
||||||
fixed = re.sub("\x94","”",fixed)
|
|
||||||
|
|
||||||
# Replace ndash (\x96)
|
|
||||||
fixed = re.sub("\x96","–",fixed)
|
|
||||||
|
|
||||||
# Replace mdash (\x97)
|
|
||||||
fixed = re.sub("\x97","—",fixed)
|
|
||||||
|
|
||||||
return fixed
|
|
||||||
|
|
||||||
def get_masthead_url(self):
|
def get_masthead_url(self):
|
||||||
masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif'
|
masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif'
|
||||||
@ -115,321 +75,4 @@ class USAToday(BasicNewsRecipe):
|
|||||||
masthead = None
|
masthead = None
|
||||||
return masthead
|
return masthead
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
|
||||||
# Kindle TOC descriptions won't render certain characters
|
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&","&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def parse_feeds(self, *args, **kwargs):
|
|
||||||
parsed_feeds = BasicNewsRecipe.parse_feeds(self, *args, **kwargs)
|
|
||||||
# Count articles for progress dialog
|
|
||||||
article_count = 0
|
|
||||||
for feed in parsed_feeds:
|
|
||||||
article_count += len(feed)
|
|
||||||
self.log( "Queued %d articles" % article_count)
|
|
||||||
return parsed_feeds
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
soup = self.strip_anchors(soup)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
|
||||||
|
|
||||||
# Remove navLinks <div class="inside-copy" style="padding-bottom:3px">
|
|
||||||
navLinks = soup.find(True,{'style':'padding-bottom:3px'})
|
|
||||||
if navLinks:
|
|
||||||
navLinks.extract()
|
|
||||||
|
|
||||||
# Remove <div class="inside-copy" style="margin-bottom:10px">
|
|
||||||
gibberish = soup.find(True,{'style':'margin-bottom:10px'})
|
|
||||||
if gibberish:
|
|
||||||
gibberish.extract()
|
|
||||||
|
|
||||||
# Change <inside-head> to <h2>
|
|
||||||
headline = soup.find(True, {'class':['inside-head','inside-head2']})
|
|
||||||
if not headline:
|
|
||||||
headline = soup.find('h3')
|
|
||||||
if headline:
|
|
||||||
tag = Tag(soup, "h2")
|
|
||||||
tag['class'] = "headline"
|
|
||||||
tag.insert(0, headline.contents[0])
|
|
||||||
headline.replaceWith(tag)
|
|
||||||
else:
|
|
||||||
print "unable to find headline:\n%s\n" % soup
|
|
||||||
|
|
||||||
# Change byLine to byline, change commas to middot
|
|
||||||
# Kindle renders commas in byline as '&'
|
|
||||||
byline = soup.find(True, {'class':'byLine'})
|
|
||||||
if byline:
|
|
||||||
byline['class'] = 'byline'
|
|
||||||
# Replace comma with middot
|
|
||||||
byline.contents[0].replaceWith(re.sub(","," ·", byline.renderContents()))
|
|
||||||
|
|
||||||
jumpout_punc_list = [':','?']
|
|
||||||
# Remove the inline jumpouts in <div class="inside-copy">
|
|
||||||
paras = soup.findAll(True, {'class':'inside-copy'})
|
|
||||||
for para in paras:
|
|
||||||
if re.match("<b>[\w\W]+ ",para.renderContents()):
|
|
||||||
p = para.find('b')
|
|
||||||
for punc in jumpout_punc_list:
|
|
||||||
punc_offset = p.contents[0].find(punc)
|
|
||||||
if punc_offset == -1:
|
|
||||||
continue
|
|
||||||
if punc_offset > 1:
|
|
||||||
if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
|
|
||||||
#print "extracting \n%s\n" % para.prettify()
|
|
||||||
para.extract()
|
|
||||||
|
|
||||||
# Reset class for remaining
|
|
||||||
paras = soup.findAll(True, {'class':'inside-copy'})
|
|
||||||
for para in paras:
|
|
||||||
para['class'] = 'articleBody'
|
|
||||||
|
|
||||||
# Remove inline jumpouts in <p>
|
|
||||||
paras = soup.findAll(['p'])
|
|
||||||
for p in paras:
|
|
||||||
if hasattr(p,'contents') and len(p.contents):
|
|
||||||
for punc in jumpout_punc_list:
|
|
||||||
punc_offset = p.contents[0].find(punc)
|
|
||||||
if punc_offset == -1:
|
|
||||||
continue
|
|
||||||
if punc_offset > 2 and hasattr(p,'a') and len(p.contents):
|
|
||||||
#print "evaluating %s\n" % p.contents[0][:punc_offset+1]
|
|
||||||
if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
|
|
||||||
#print "extracting \n%s\n" % p.prettify()
|
|
||||||
p.extract()
|
|
||||||
|
|
||||||
# Capture the first img, insert after headline
|
|
||||||
imgs = soup.findAll('img')
|
|
||||||
print "postprocess_html(): %d images" % len(imgs)
|
|
||||||
if imgs:
|
|
||||||
divTag = Tag(soup, 'div')
|
|
||||||
divTag['class'] = 'image'
|
|
||||||
body = soup.find('body')
|
|
||||||
img = imgs[0]
|
|
||||||
#print "img: \n%s\n" % img.prettify()
|
|
||||||
|
|
||||||
# Table for photo and credit
|
|
||||||
tableTag = Tag(soup,'table')
|
|
||||||
|
|
||||||
# Photo
|
|
||||||
trimgTag = Tag(soup, 'tr')
|
|
||||||
tdimgTag = Tag(soup, 'td')
|
|
||||||
tdimgTag.insert(0,img)
|
|
||||||
trimgTag.insert(0,tdimgTag)
|
|
||||||
tableTag.insert(0,trimgTag)
|
|
||||||
|
|
||||||
# Credit
|
|
||||||
trcreditTag = Tag(soup, 'tr')
|
|
||||||
|
|
||||||
tdcreditTag = Tag(soup, 'td')
|
|
||||||
tdcreditTag['class'] = 'credit'
|
|
||||||
credit = soup.find('td',{'class':'photoCredit'})
|
|
||||||
if credit:
|
|
||||||
tdcreditTag.insert(0,NavigableString(credit.renderContents()))
|
|
||||||
else:
|
|
||||||
credit = img['credit']
|
|
||||||
if credit:
|
|
||||||
tdcreditTag.insert(0,NavigableString(credit))
|
|
||||||
else:
|
|
||||||
tdcreditTag.insert(0,NavigableString(''))
|
|
||||||
|
|
||||||
trcreditTag.insert(0,tdcreditTag)
|
|
||||||
tableTag.insert(1,trcreditTag)
|
|
||||||
dtc = 0
|
|
||||||
divTag.insert(dtc,tableTag)
|
|
||||||
dtc += 1
|
|
||||||
|
|
||||||
if False:
|
|
||||||
# Add the caption in the table
|
|
||||||
tableCaptionTag = Tag(soup,'caption')
|
|
||||||
tableCaptionTag.insert(0,soup.find('td',{'class':'photoCredit'}).renderContents())
|
|
||||||
tableTag.insert(1,tableCaptionTag)
|
|
||||||
divTag.insert(dtc,tableTag)
|
|
||||||
dtc += 1
|
|
||||||
body.insert(1,divTag)
|
|
||||||
else:
|
|
||||||
# Add the caption below the table
|
|
||||||
#print "Looking for caption in this soup:\n%s" % img.prettify()
|
|
||||||
captionTag = Tag(soup,'p')
|
|
||||||
captionTag['class'] = 'caption'
|
|
||||||
if hasattr(img,'alt') and img['alt']:
|
|
||||||
captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['alt']))
|
|
||||||
divTag.insert(dtc, captionTag)
|
|
||||||
dtc += 1
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['cutline']))
|
|
||||||
divTag.insert(dtc, captionTag)
|
|
||||||
dtc += 1
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
hrTag = Tag(soup, 'hr')
|
|
||||||
divTag.insert(dtc, hrTag)
|
|
||||||
dtc += 1
|
|
||||||
|
|
||||||
# Delete <div id="applyMainStoryPhoto"
|
|
||||||
photoJunk = soup.find('div',{'id':'applyMainStoryPhoto'})
|
|
||||||
if photoJunk:
|
|
||||||
photoJunk.extract()
|
|
||||||
|
|
||||||
# Insert img after headline
|
|
||||||
tag = body.find(True)
|
|
||||||
insertLoc = 0
|
|
||||||
headline_found = False
|
|
||||||
while True:
|
|
||||||
# Scan the top-level tags
|
|
||||||
insertLoc += 1
|
|
||||||
if hasattr(tag,'class') and tag['class'] == 'headline':
|
|
||||||
headline_found = True
|
|
||||||
body.insert(insertLoc,divTag)
|
|
||||||
break
|
|
||||||
tag = tag.nextSibling
|
|
||||||
if not tag:
|
|
||||||
break
|
|
||||||
|
|
||||||
if not headline_found:
|
|
||||||
# Monolithic <div> - restructure
|
|
||||||
tag = body.find(True)
|
|
||||||
while True:
|
|
||||||
insertLoc += 1
|
|
||||||
try:
|
|
||||||
if hasattr(tag,'class') and tag['class'] == 'headline':
|
|
||||||
headline_found = True
|
|
||||||
tag.insert(insertLoc,divTag)
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
tag = tag.next
|
|
||||||
if not tag:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Yank out headline, img and caption
|
|
||||||
headline = body.find('h2','headline')
|
|
||||||
img = body.find('div','image')
|
|
||||||
caption = body.find('p''class')
|
|
||||||
|
|
||||||
# body(0) is calibre_navbar
|
|
||||||
# body(1) is <div class="item">
|
|
||||||
|
|
||||||
btc = 1
|
|
||||||
headline.extract()
|
|
||||||
body.insert(1, headline)
|
|
||||||
btc += 1
|
|
||||||
if img:
|
|
||||||
img.extract()
|
|
||||||
body.insert(btc, img)
|
|
||||||
btc += 1
|
|
||||||
if caption:
|
|
||||||
caption.extract()
|
|
||||||
body.insert(btc, caption)
|
|
||||||
btc += 1
|
|
||||||
|
|
||||||
if len(imgs) > 1:
|
|
||||||
if True:
|
|
||||||
[img.extract() for img in imgs[1:]]
|
|
||||||
else:
|
|
||||||
# Format the remaining images
|
|
||||||
# This doesn't work yet
|
|
||||||
for img in imgs[1:]:
|
|
||||||
print "img:\n%s\n" % img.prettify()
|
|
||||||
divTag = Tag(soup, 'div')
|
|
||||||
divTag['class'] = 'image'
|
|
||||||
|
|
||||||
# Table for photo and credit
|
|
||||||
tableTag = Tag(soup,'table')
|
|
||||||
|
|
||||||
# Photo
|
|
||||||
trimgTag = Tag(soup, 'tr')
|
|
||||||
tdimgTag = Tag(soup, 'td')
|
|
||||||
tdimgTag.insert(0,img)
|
|
||||||
trimgTag.insert(0,tdimgTag)
|
|
||||||
tableTag.insert(0,trimgTag)
|
|
||||||
|
|
||||||
# Credit
|
|
||||||
trcreditTag = Tag(soup, 'tr')
|
|
||||||
|
|
||||||
tdcreditTag = Tag(soup, 'td')
|
|
||||||
tdcreditTag['class'] = 'credit'
|
|
||||||
try:
|
|
||||||
tdcreditTag.insert(0,NavigableString(img['credit']))
|
|
||||||
except:
|
|
||||||
tdcreditTag.insert(0,NavigableString(''))
|
|
||||||
trcreditTag.insert(0,tdcreditTag)
|
|
||||||
tableTag.insert(1,trcreditTag)
|
|
||||||
divTag.insert(0,tableTag)
|
|
||||||
soup.img.replaceWith(divTag)
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log) :
|
|
||||||
|
|
||||||
def extract_byline(href) :
|
|
||||||
# <meta name="byline" content=
|
|
||||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
|
||||||
byline = soup.find('div',attrs={'class':'byline'})
|
|
||||||
if byline:
|
|
||||||
byline['class'] = 'byline'
|
|
||||||
# Replace comma with middot
|
|
||||||
byline.contents[0].replaceWith(re.sub(u",", u" ·",
|
|
||||||
byline.renderContents(encoding=None)))
|
|
||||||
return byline.renderContents(encoding=None)
|
|
||||||
else :
|
|
||||||
paras = soup.findAll(text=True)
|
|
||||||
for para in paras:
|
|
||||||
if para.startswith("Copyright"):
|
|
||||||
return para[len('Copyright xxxx '):para.find('.')]
|
|
||||||
return None
|
|
||||||
|
|
||||||
def extract_description(href) :
|
|
||||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
|
||||||
description = soup.find('meta',attrs={'name':'description'})
|
|
||||||
if description :
|
|
||||||
return self.massageNCXText(description['content'])
|
|
||||||
else:
|
|
||||||
# Take first paragraph of article
|
|
||||||
articleBody = soup.find('div',attrs={'id':['articleBody','item']})
|
|
||||||
if articleBody:
|
|
||||||
paras = articleBody.findAll('p')
|
|
||||||
for p in paras:
|
|
||||||
if p.renderContents() > '' :
|
|
||||||
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
|
|
||||||
else:
|
|
||||||
print "Didn't find <div id='articleBody'> in this soup:\n%s" % soup.prettify()
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Method entry point here
|
|
||||||
# Single section toc looks different than multi-section tocs
|
|
||||||
if oeb.toc.depth() == 2 :
|
|
||||||
for article in oeb.toc :
|
|
||||||
if article.author is None :
|
|
||||||
article.author = extract_byline(article.href)
|
|
||||||
if article.description is None :
|
|
||||||
article.description = extract_description(article.href)
|
|
||||||
elif oeb.toc.depth() == 3 :
|
|
||||||
for section in oeb.toc :
|
|
||||||
for article in section :
|
|
||||||
article.author = extract_byline(article.href)
|
|
||||||
'''
|
|
||||||
if article.author is None :
|
|
||||||
article.author = self.massageNCXText(extract_byline(article.href))
|
|
||||||
else:
|
|
||||||
article.author = self.massageNCXText(article.author)
|
|
||||||
'''
|
|
||||||
if article.description is None :
|
|
||||||
article.description = extract_description(article.href)
|
|
||||||
|
|
||||||
def strip_anchors(self,soup):
|
|
||||||
paras = soup.findAll(True)
|
|
||||||
for para in paras:
|
|
||||||
aTags = para.findAll('a')
|
|
||||||
for a in aTags:
|
|
||||||
if a.img is None:
|
|
||||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
|
||||||
return soup
|
|
||||||
|
39
recipes/vitalia.recipe
Normal file
39
recipes/vitalia.recipe
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class VitaliaCZ(BasicNewsRecipe):
|
||||||
|
title = 'Vitalia'
|
||||||
|
__author__ = 'Tomas Latal'
|
||||||
|
__version__ = '1.0'
|
||||||
|
__date__ = '30 April 2011'
|
||||||
|
description = u'Aktuality a \u010dl\xe1nky z Vitalia.cz'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Internet Info s.r.o.'
|
||||||
|
category = 'zdravi, vztahy, wellness, CZ'
|
||||||
|
language = 'cs'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
extra_css = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0; line-height: 1.4; padding: 0 0 10px 0; font-weight: bold;} \
|
||||||
|
p.perex img {display:none;} \
|
||||||
|
span.author {font-size:0.8em; font-style:italic} \
|
||||||
|
.urs div.rs-tip-major {padding:0.5em; background: #e0e0e0 none repeat scroll 0 0;border: 1px solid #909090;} \
|
||||||
|
.urs p {margin: 0 0 0.8em 0;}'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Aktuality', 'http://www.vitalia.cz/rss/aktuality/'),
|
||||||
|
(u'\u010cl\xe1nky', u'http://www.vitalia.cz/rss/clanky/'),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id='main')
|
||||||
|
|
||||||
|
remove_tags_after = [dict(id='main')]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
|
||||||
|
dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
|
||||||
|
]
|
115
recipes/volksrant_sub.recipe
Normal file
115
recipes/volksrant_sub.recipe
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Volkskrant_full(BasicNewsRecipe):
|
||||||
|
# This recipe will download the Volkskrant newspaper,
|
||||||
|
# from the subscribers site. It requires a password.
|
||||||
|
# Known issues are: articles that are spread out over
|
||||||
|
# multiple pages will appear multiple times. Pages
|
||||||
|
# that contain only adverts will appear, but empty.
|
||||||
|
# The supplement 'Volkskrant Magazine' on saturday
|
||||||
|
# is currently not downloaded.
|
||||||
|
# You can set a manual date, to download an archived
|
||||||
|
# newspaper. Volkskrant stores over a month at the
|
||||||
|
# moment of writing. To do so I suggest you unmark
|
||||||
|
# the date on the line below, and insert it in the title. Then
|
||||||
|
# follow the instructions marked further below.
|
||||||
|
|
||||||
|
title = 'De Volkskrant (subscription)' # [za, 13 nov 2010]'
|
||||||
|
__author__ = u'Selcal'
|
||||||
|
description = u"Volkskrant"
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'nl'
|
||||||
|
use_embedded_content = False
|
||||||
|
simultaneous_downloads = 1
|
||||||
|
delay = 1
|
||||||
|
needs_subscription = True
|
||||||
|
# Set RETRIEVEDATE to 'yyyymmdd' to load an older
|
||||||
|
# edition. Otherwise keep '%Y%m%d'
|
||||||
|
# When setting a manual date, unmark and add the date
|
||||||
|
# to the title above, and unmark the timefmt line to stop
|
||||||
|
# Calibre from adding today's date in addition.
|
||||||
|
|
||||||
|
# timefmt = ''
|
||||||
|
RETRIEVEDATE = strftime('%Y%m%d')
|
||||||
|
INDEX_MAIN = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/#text'
|
||||||
|
INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/'
|
||||||
|
LOGIN = 'http://www.volkskrant.nl/vk/user/loggedIn.do'
|
||||||
|
remove_tags = [dict(name='address')]
|
||||||
|
cover_url = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/page.jpg'
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open(self.LOGIN)
|
||||||
|
br.select_form(nr = 0)
|
||||||
|
br['username'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
krant = []
|
||||||
|
def strip_title(_title):
|
||||||
|
i = 0
|
||||||
|
while ((_title[i] <> ":") and (i <= len(_title))):
|
||||||
|
i = i + 1
|
||||||
|
return(_title[0:i])
|
||||||
|
for temp in range (5):
|
||||||
|
try:
|
||||||
|
soup = self.index_to_soup(self.INDEX_MAIN)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
#print '(Retrying main index load)'
|
||||||
|
continue
|
||||||
|
mainsoup = soup.find('td', attrs={'id': 'select_page_top'})
|
||||||
|
for option in mainsoup.findAll('option'):
|
||||||
|
articles = []
|
||||||
|
_INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text'
|
||||||
|
_INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/'
|
||||||
|
#print ''
|
||||||
|
#print '<------- Processing section: ' + _INDEX + ' ------------------------->'
|
||||||
|
for temp in range (5):
|
||||||
|
try:
|
||||||
|
soup = self.index_to_soup(_INDEX)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
#print '(Retrying index load)'
|
||||||
|
continue
|
||||||
|
for item in soup.findAll('area'):
|
||||||
|
art_nr = item['class']
|
||||||
|
attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)]
|
||||||
|
#print '==> Found: ' + attrname;
|
||||||
|
index_title = soup.find('div', attrs={'class': attrname})
|
||||||
|
get_title = index_title['title'];
|
||||||
|
_ARTICLE = _INDEX_ARTICLE + attrname + '.html#text'
|
||||||
|
title = get_title;
|
||||||
|
#print '--> Title: ' + title;
|
||||||
|
#print '--> URL: ' + _ARTICLE;
|
||||||
|
for temp in range (5):
|
||||||
|
try:
|
||||||
|
souparticle = self.index_to_soup(_ARTICLE);
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
print '(Retrying URL load)'
|
||||||
|
continue
|
||||||
|
headerurl = souparticle.findAll('frame')[0]['src'];
|
||||||
|
#print '--> Read frame name for header: ' + headerurl;
|
||||||
|
url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html';
|
||||||
|
#print '--> Corrected URL: ' + url;
|
||||||
|
if (get_title <> ''):
|
||||||
|
title = strip_title(get_title)
|
||||||
|
date = strftime(' %B %Y')
|
||||||
|
if (title <> ''):
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
krant.append( (option.string, articles))
|
||||||
|
return krant
|
||||||
|
|
@ -118,6 +118,7 @@ sort_columns_at_startup = None
|
|||||||
# timestamp default if not set: dd MMM yyyy
|
# timestamp default if not set: dd MMM yyyy
|
||||||
gui_pubdate_display_format = 'MMM yyyy'
|
gui_pubdate_display_format = 'MMM yyyy'
|
||||||
gui_timestamp_display_format = 'dd MMM yyyy'
|
gui_timestamp_display_format = 'dd MMM yyyy'
|
||||||
|
gui_last_modified_display_format = 'dd MMM yyyy'
|
||||||
|
|
||||||
#: Control sorting of titles and series in the library display
|
#: Control sorting of titles and series in the library display
|
||||||
# Control title and series sorting in the library view. If set to
|
# Control title and series sorting in the library view. If set to
|
||||||
|
@ -7,17 +7,30 @@ CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|||||||
title TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE,
|
title TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE,
|
||||||
sort TEXT COLLATE NOCASE,
|
sort TEXT COLLATE NOCASE,
|
||||||
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
uri TEXT,
|
pubdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
series_index INTEGER NOT NULL DEFAULT 1,
|
series_index REAL NOT NULL DEFAULT 1.0,
|
||||||
author_sort TEXT COLLATE NOCASE,
|
author_sort TEXT COLLATE NOCASE,
|
||||||
isbn TEXT DEFAULT "" COLLATE NOCASE,
|
isbn TEXT DEFAULT "" COLLATE NOCASE,
|
||||||
path TEXT NOT NULL DEFAULT ""
|
lccn TEXT DEFAULT "" COLLATE NOCASE,
|
||||||
);
|
path TEXT NOT NULL DEFAULT "",
|
||||||
|
flags INTEGER NOT NULL DEFAULT 1
|
||||||
|
, uuid TEXT, has_cover BOOL DEFAULT 0, last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00");
|
||||||
CREATE TABLE books_authors_link ( id INTEGER PRIMARY KEY,
|
CREATE TABLE books_authors_link ( id INTEGER PRIMARY KEY,
|
||||||
book INTEGER NOT NULL,
|
book INTEGER NOT NULL,
|
||||||
author INTEGER NOT NULL,
|
author INTEGER NOT NULL,
|
||||||
UNIQUE(book, author)
|
UNIQUE(book, author)
|
||||||
);
|
);
|
||||||
|
CREATE TABLE books_languages_link ( id INTEGER PRIMARY KEY,
|
||||||
|
book INTEGER NOT NULL,
|
||||||
|
lang_code INTEGER NOT NULL,
|
||||||
|
item_order INTEGER NOT NULL DEFAULT 0,
|
||||||
|
UNIQUE(book, lang_code)
|
||||||
|
);
|
||||||
|
CREATE TABLE books_plugin_data(id INTEGER PRIMARY KEY,
|
||||||
|
book INTEGER NON NULL,
|
||||||
|
name TEXT NON NULL,
|
||||||
|
val TEXT NON NULL,
|
||||||
|
UNIQUE(book,name));
|
||||||
CREATE TABLE books_publishers_link ( id INTEGER PRIMARY KEY,
|
CREATE TABLE books_publishers_link ( id INTEGER PRIMARY KEY,
|
||||||
book INTEGER NOT NULL,
|
book INTEGER NOT NULL,
|
||||||
publisher INTEGER NOT NULL,
|
publisher INTEGER NOT NULL,
|
||||||
@ -49,11 +62,51 @@ CREATE TABLE conversion_options ( id INTEGER PRIMARY KEY,
|
|||||||
data BLOB NOT NULL,
|
data BLOB NOT NULL,
|
||||||
UNIQUE(format,book)
|
UNIQUE(format,book)
|
||||||
);
|
);
|
||||||
|
CREATE TABLE custom_columns (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
label TEXT NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
datatype TEXT NOT NULL,
|
||||||
|
mark_for_delete BOOL DEFAULT 0 NOT NULL,
|
||||||
|
editable BOOL DEFAULT 1 NOT NULL,
|
||||||
|
display TEXT DEFAULT "{}" NOT NULL,
|
||||||
|
is_multiple BOOL DEFAULT 0 NOT NULL,
|
||||||
|
normalized BOOL NOT NULL,
|
||||||
|
UNIQUE(label)
|
||||||
|
);
|
||||||
|
CREATE TABLE data ( id INTEGER PRIMARY KEY,
|
||||||
|
book INTEGER NON NULL,
|
||||||
|
format TEXT NON NULL COLLATE NOCASE,
|
||||||
|
uncompressed_size INTEGER NON NULL,
|
||||||
|
name TEXT NON NULL,
|
||||||
|
UNIQUE(book, format)
|
||||||
|
);
|
||||||
CREATE TABLE feeds ( id INTEGER PRIMARY KEY,
|
CREATE TABLE feeds ( id INTEGER PRIMARY KEY,
|
||||||
title TEXT NOT NULL,
|
title TEXT NOT NULL,
|
||||||
script TEXT NOT NULL,
|
script TEXT NOT NULL,
|
||||||
UNIQUE(title)
|
UNIQUE(title)
|
||||||
);
|
);
|
||||||
|
CREATE TABLE identifiers ( id INTEGER PRIMARY KEY,
|
||||||
|
book INTEGER NON NULL,
|
||||||
|
type TEXT NON NULL DEFAULT "isbn" COLLATE NOCASE,
|
||||||
|
val TEXT NON NULL COLLATE NOCASE,
|
||||||
|
UNIQUE(book, type)
|
||||||
|
);
|
||||||
|
CREATE TABLE languages ( id INTEGER PRIMARY KEY,
|
||||||
|
lang_code TEXT NON NULL COLLATE NOCASE,
|
||||||
|
UNIQUE(lang_code)
|
||||||
|
);
|
||||||
|
CREATE TABLE library_id ( id INTEGER PRIMARY KEY,
|
||||||
|
uuid TEXT NOT NULL,
|
||||||
|
UNIQUE(uuid)
|
||||||
|
);
|
||||||
|
CREATE TABLE metadata_dirtied(id INTEGER PRIMARY KEY,
|
||||||
|
book INTEGER NOT NULL,
|
||||||
|
UNIQUE(book));
|
||||||
|
CREATE TABLE preferences(id INTEGER PRIMARY KEY,
|
||||||
|
key TEXT NON NULL,
|
||||||
|
val TEXT NON NULL,
|
||||||
|
UNIQUE(key));
|
||||||
CREATE TABLE publishers ( id INTEGER PRIMARY KEY,
|
CREATE TABLE publishers ( id INTEGER PRIMARY KEY,
|
||||||
name TEXT NOT NULL COLLATE NOCASE,
|
name TEXT NOT NULL COLLATE NOCASE,
|
||||||
sort TEXT COLLATE NOCASE,
|
sort TEXT COLLATE NOCASE,
|
||||||
@ -72,34 +125,143 @@ CREATE TABLE tags ( id INTEGER PRIMARY KEY,
|
|||||||
name TEXT NOT NULL COLLATE NOCASE,
|
name TEXT NOT NULL COLLATE NOCASE,
|
||||||
UNIQUE (name)
|
UNIQUE (name)
|
||||||
);
|
);
|
||||||
CREATE TABLE data ( id INTEGER PRIMARY KEY,
|
|
||||||
book INTEGER NON NULL,
|
|
||||||
format TEXT NON NULL COLLATE NOCASE,
|
|
||||||
uncompressed_size INTEGER NON NULL,
|
|
||||||
name TEXT NON NULL,
|
|
||||||
UNIQUE(book, format)
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE VIEW meta AS
|
CREATE VIEW meta AS
|
||||||
SELECT id, title,
|
SELECT id, title,
|
||||||
(SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors,
|
(SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
|
||||||
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
|
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
|
||||||
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
|
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
|
||||||
timestamp,
|
timestamp,
|
||||||
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
|
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
|
||||||
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
|
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
|
||||||
(SELECT text FROM comments WHERE book=books.id) comments,
|
(SELECT text FROM comments WHERE book=books.id) comments,
|
||||||
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
|
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
|
||||||
series_index,
|
series_index,
|
||||||
sort,
|
sort,
|
||||||
author_sort,
|
author_sort,
|
||||||
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
|
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
|
||||||
isbn
|
isbn,
|
||||||
FROM books;
|
path,
|
||||||
|
lccn,
|
||||||
|
pubdate,
|
||||||
|
flags,
|
||||||
|
uuid
|
||||||
|
FROM books;
|
||||||
|
CREATE VIEW tag_browser_authors AS SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
(SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.author=authors.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||||
|
sort AS sort
|
||||||
|
FROM authors;
|
||||||
|
CREATE VIEW tag_browser_filtered_authors AS SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
(SELECT COUNT(books_authors_link.id) FROM books_authors_link WHERE
|
||||||
|
author=authors.id AND books_list_filter(book)) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.author=authors.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||||
|
books_list_filter(bl.book)) avg_rating,
|
||||||
|
sort AS sort
|
||||||
|
FROM authors;
|
||||||
|
CREATE VIEW tag_browser_filtered_publishers AS SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
(SELECT COUNT(books_publishers_link.id) FROM books_publishers_link WHERE
|
||||||
|
publisher=publishers.id AND books_list_filter(book)) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||||
|
books_list_filter(bl.book)) avg_rating,
|
||||||
|
name AS sort
|
||||||
|
FROM publishers;
|
||||||
|
CREATE VIEW tag_browser_filtered_ratings AS SELECT
|
||||||
|
id,
|
||||||
|
rating,
|
||||||
|
(SELECT COUNT(books_ratings_link.id) FROM books_ratings_link WHERE
|
||||||
|
rating=ratings.id AND books_list_filter(book)) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.rating=ratings.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||||
|
books_list_filter(bl.book)) avg_rating,
|
||||||
|
rating AS sort
|
||||||
|
FROM ratings;
|
||||||
|
CREATE VIEW tag_browser_filtered_series AS SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
(SELECT COUNT(books_series_link.id) FROM books_series_link WHERE
|
||||||
|
series=series.id AND books_list_filter(book)) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_series_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.series=series.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||||
|
books_list_filter(bl.book)) avg_rating,
|
||||||
|
(title_sort(name)) AS sort
|
||||||
|
FROM series;
|
||||||
|
CREATE VIEW tag_browser_filtered_tags AS SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
(SELECT COUNT(books_tags_link.id) FROM books_tags_link WHERE
|
||||||
|
tag=tags.id AND books_list_filter(book)) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.tag=tags.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||||
|
books_list_filter(bl.book)) avg_rating,
|
||||||
|
name AS sort
|
||||||
|
FROM tags;
|
||||||
|
CREATE VIEW tag_browser_publishers AS SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
(SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||||
|
name AS sort
|
||||||
|
FROM publishers;
|
||||||
|
CREATE VIEW tag_browser_ratings AS SELECT
|
||||||
|
id,
|
||||||
|
rating,
|
||||||
|
(SELECT COUNT(id) FROM books_ratings_link WHERE rating=ratings.id) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.rating=ratings.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||||
|
rating AS sort
|
||||||
|
FROM ratings;
|
||||||
|
CREATE VIEW tag_browser_series AS SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
(SELECT COUNT(id) FROM books_series_link WHERE series=series.id) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_series_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.series=series.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||||
|
(title_sort(name)) AS sort
|
||||||
|
FROM series;
|
||||||
|
CREATE VIEW tag_browser_tags AS SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
(SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) count,
|
||||||
|
(SELECT AVG(ratings.rating)
|
||||||
|
FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
|
||||||
|
WHERE tl.tag=tags.id AND bl.book=tl.book AND
|
||||||
|
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||||
|
name AS sort
|
||||||
|
FROM tags;
|
||||||
CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE);
|
CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE);
|
||||||
CREATE INDEX books_authors_link_aidx ON books_authors_link (author);
|
CREATE INDEX books_authors_link_aidx ON books_authors_link (author);
|
||||||
CREATE INDEX books_authors_link_bidx ON books_authors_link (book);
|
CREATE INDEX books_authors_link_bidx ON books_authors_link (book);
|
||||||
CREATE INDEX books_idx ON books (sort COLLATE NOCASE);
|
CREATE INDEX books_idx ON books (sort COLLATE NOCASE);
|
||||||
|
CREATE INDEX books_languages_link_aidx ON books_languages_link (lang_code);
|
||||||
|
CREATE INDEX books_languages_link_bidx ON books_languages_link (book);
|
||||||
CREATE INDEX books_publishers_link_aidx ON books_publishers_link (publisher);
|
CREATE INDEX books_publishers_link_aidx ON books_publishers_link (publisher);
|
||||||
CREATE INDEX books_publishers_link_bidx ON books_publishers_link (book);
|
CREATE INDEX books_publishers_link_bidx ON books_publishers_link (book);
|
||||||
CREATE INDEX books_ratings_link_aidx ON books_ratings_link (rating);
|
CREATE INDEX books_ratings_link_aidx ON books_ratings_link (rating);
|
||||||
@ -111,32 +273,38 @@ CREATE INDEX books_tags_link_bidx ON books_tags_link (book);
|
|||||||
CREATE INDEX comments_idx ON comments (book);
|
CREATE INDEX comments_idx ON comments (book);
|
||||||
CREATE INDEX conversion_options_idx_a ON conversion_options (format COLLATE NOCASE);
|
CREATE INDEX conversion_options_idx_a ON conversion_options (format COLLATE NOCASE);
|
||||||
CREATE INDEX conversion_options_idx_b ON conversion_options (book);
|
CREATE INDEX conversion_options_idx_b ON conversion_options (book);
|
||||||
|
CREATE INDEX custom_columns_idx ON custom_columns (label);
|
||||||
CREATE INDEX data_idx ON data (book);
|
CREATE INDEX data_idx ON data (book);
|
||||||
|
CREATE INDEX formats_idx ON data (format);
|
||||||
|
CREATE INDEX languages_idx ON languages (lang_code COLLATE NOCASE);
|
||||||
CREATE INDEX publishers_idx ON publishers (name COLLATE NOCASE);
|
CREATE INDEX publishers_idx ON publishers (name COLLATE NOCASE);
|
||||||
CREATE INDEX series_idx ON series (sort COLLATE NOCASE);
|
CREATE INDEX series_idx ON series (name COLLATE NOCASE);
|
||||||
CREATE INDEX tags_idx ON tags (name COLLATE NOCASE);
|
CREATE INDEX tags_idx ON tags (name COLLATE NOCASE);
|
||||||
CREATE TRIGGER books_delete_trg
|
CREATE TRIGGER books_delete_trg
|
||||||
AFTER DELETE ON books
|
AFTER DELETE ON books
|
||||||
BEGIN
|
BEGIN
|
||||||
DELETE FROM books_authors_link WHERE book=OLD.id;
|
DELETE FROM books_authors_link WHERE book=OLD.id;
|
||||||
DELETE FROM books_publishers_link WHERE book=OLD.id;
|
DELETE FROM books_publishers_link WHERE book=OLD.id;
|
||||||
DELETE FROM books_ratings_link WHERE book=OLD.id;
|
DELETE FROM books_ratings_link WHERE book=OLD.id;
|
||||||
DELETE FROM books_series_link WHERE book=OLD.id;
|
DELETE FROM books_series_link WHERE book=OLD.id;
|
||||||
DELETE FROM books_tags_link WHERE book=OLD.id;
|
DELETE FROM books_tags_link WHERE book=OLD.id;
|
||||||
DELETE FROM data WHERE book=OLD.id;
|
DELETE FROM books_languages_link WHERE book=OLD.id;
|
||||||
DELETE FROM comments WHERE book=OLD.id;
|
DELETE FROM data WHERE book=OLD.id;
|
||||||
DELETE FROM conversion_options WHERE book=OLD.id;
|
DELETE FROM comments WHERE book=OLD.id;
|
||||||
|
DELETE FROM conversion_options WHERE book=OLD.id;
|
||||||
|
DELETE FROM books_plugin_data WHERE book=OLD.id;
|
||||||
|
DELETE FROM identifiers WHERE book=OLD.id;
|
||||||
END;
|
END;
|
||||||
CREATE TRIGGER books_insert_trg
|
CREATE TRIGGER books_insert_trg AFTER INSERT ON books
|
||||||
AFTER INSERT ON books
|
|
||||||
BEGIN
|
BEGIN
|
||||||
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
|
UPDATE books SET sort=title_sort(NEW.title),uuid=uuid4() WHERE id=NEW.id;
|
||||||
END;
|
END;
|
||||||
CREATE TRIGGER books_update_trg
|
CREATE TRIGGER books_update_trg
|
||||||
AFTER UPDATE ON books
|
AFTER UPDATE ON books
|
||||||
BEGIN
|
BEGIN
|
||||||
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
|
UPDATE books SET sort=title_sort(NEW.title)
|
||||||
END;
|
WHERE id=NEW.id AND OLD.title <> NEW.title;
|
||||||
|
END;
|
||||||
CREATE TRIGGER fkc_comments_insert
|
CREATE TRIGGER fkc_comments_insert
|
||||||
BEFORE INSERT ON comments
|
BEFORE INSERT ON comments
|
||||||
BEGIN
|
BEGIN
|
||||||
@ -169,23 +337,41 @@ CREATE TRIGGER fkc_data_update
|
|||||||
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
|
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
|
||||||
END;
|
END;
|
||||||
END;
|
END;
|
||||||
CREATE TRIGGER fkc_delete_books_authors_link
|
CREATE TRIGGER fkc_delete_on_authors
|
||||||
BEFORE DELETE ON authors
|
BEFORE DELETE ON authors
|
||||||
BEGIN
|
BEGIN
|
||||||
SELECT CASE
|
SELECT CASE
|
||||||
WHEN (SELECT COUNT(id) FROM books_authors_link WHERE book=OLD.book) > 0
|
WHEN (SELECT COUNT(id) FROM books_authors_link WHERE author=OLD.id) > 0
|
||||||
THEN RAISE(ABORT, 'Foreign key violation: author is still referenced')
|
THEN RAISE(ABORT, 'Foreign key violation: authors is still referenced')
|
||||||
END;
|
END;
|
||||||
END;
|
END;
|
||||||
CREATE TRIGGER fkc_delete_books_publishers_link
|
CREATE TRIGGER fkc_delete_on_languages
|
||||||
|
BEFORE DELETE ON languages
|
||||||
|
BEGIN
|
||||||
|
SELECT CASE
|
||||||
|
WHEN (SELECT COUNT(id) FROM books_languages_link WHERE lang_code=OLD.id) > 0
|
||||||
|
THEN RAISE(ABORT, 'Foreign key violation: language is still referenced')
|
||||||
|
END;
|
||||||
|
END;
|
||||||
|
CREATE TRIGGER fkc_delete_on_languages_link
|
||||||
|
BEFORE INSERT ON books_languages_link
|
||||||
|
BEGIN
|
||||||
|
SELECT CASE
|
||||||
|
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
|
||||||
|
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
|
||||||
|
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
|
||||||
|
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
|
||||||
|
END;
|
||||||
|
END;
|
||||||
|
CREATE TRIGGER fkc_delete_on_publishers
|
||||||
BEFORE DELETE ON publishers
|
BEFORE DELETE ON publishers
|
||||||
BEGIN
|
BEGIN
|
||||||
SELECT CASE
|
SELECT CASE
|
||||||
WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE book=OLD.book) > 0
|
WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=OLD.id) > 0
|
||||||
THEN RAISE(ABORT, 'Foreign key violation: publisher is still referenced')
|
THEN RAISE(ABORT, 'Foreign key violation: publishers is still referenced')
|
||||||
END;
|
END;
|
||||||
END;
|
END;
|
||||||
CREATE TRIGGER fkc_delete_books_series_link
|
CREATE TRIGGER fkc_delete_on_series
|
||||||
BEFORE DELETE ON series
|
BEFORE DELETE ON series
|
||||||
BEGIN
|
BEGIN
|
||||||
SELECT CASE
|
SELECT CASE
|
||||||
@ -193,12 +379,12 @@ CREATE TRIGGER fkc_delete_books_series_link
|
|||||||
THEN RAISE(ABORT, 'Foreign key violation: series is still referenced')
|
THEN RAISE(ABORT, 'Foreign key violation: series is still referenced')
|
||||||
END;
|
END;
|
||||||
END;
|
END;
|
||||||
CREATE TRIGGER fkc_delete_books_tags_link
|
CREATE TRIGGER fkc_delete_on_tags
|
||||||
BEFORE DELETE ON tags
|
BEFORE DELETE ON tags
|
||||||
BEGIN
|
BEGIN
|
||||||
SELECT CASE
|
SELECT CASE
|
||||||
WHEN (SELECT COUNT(id) FROM books_tags_link WHERE tag=OLD.id) > 0
|
WHEN (SELECT COUNT(id) FROM books_tags_link WHERE tag=OLD.id) > 0
|
||||||
THEN RAISE(ABORT, 'Foreign key violation: tag is still referenced')
|
THEN RAISE(ABORT, 'Foreign key violation: tags is still referenced')
|
||||||
END;
|
END;
|
||||||
END;
|
END;
|
||||||
CREATE TRIGGER fkc_insert_books_authors_link
|
CREATE TRIGGER fkc_insert_books_authors_link
|
||||||
@ -267,6 +453,22 @@ CREATE TRIGGER fkc_update_books_authors_link_b
|
|||||||
THEN RAISE(ABORT, 'Foreign key violation: author not in authors')
|
THEN RAISE(ABORT, 'Foreign key violation: author not in authors')
|
||||||
END;
|
END;
|
||||||
END;
|
END;
|
||||||
|
CREATE TRIGGER fkc_update_books_languages_link_a
|
||||||
|
BEFORE UPDATE OF book ON books_languages_link
|
||||||
|
BEGIN
|
||||||
|
SELECT CASE
|
||||||
|
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
|
||||||
|
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
|
||||||
|
END;
|
||||||
|
END;
|
||||||
|
CREATE TRIGGER fkc_update_books_languages_link_b
|
||||||
|
BEFORE UPDATE OF lang_code ON books_languages_link
|
||||||
|
BEGIN
|
||||||
|
SELECT CASE
|
||||||
|
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
|
||||||
|
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
|
||||||
|
END;
|
||||||
|
END;
|
||||||
CREATE TRIGGER fkc_update_books_publishers_link_a
|
CREATE TRIGGER fkc_update_books_publishers_link_a
|
||||||
BEFORE UPDATE OF book ON books_publishers_link
|
BEFORE UPDATE OF book ON books_publishers_link
|
||||||
BEGIN
|
BEGIN
|
||||||
@ -341,3 +543,4 @@ CREATE TRIGGER series_update_trg
|
|||||||
BEGIN
|
BEGIN
|
||||||
UPDATE series SET sort=NEW.name WHERE id=NEW.id;
|
UPDATE series SET sort=NEW.name WHERE id=NEW.id;
|
||||||
END;
|
END;
|
||||||
|
pragma user_version=20;
|
||||||
|
Binary file not shown.
@ -2,6 +2,11 @@ a {
|
|||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
color: blue
|
color: blue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
a:hover {
|
||||||
|
color: red
|
||||||
|
}
|
||||||
|
|
||||||
.comments {
|
.comments {
|
||||||
margin-top: 0;
|
margin-top: 0;
|
||||||
padding-top: 0;
|
padding-top: 0;
|
||||||
|
@ -23,6 +23,9 @@ wWinMain(HINSTANCE Inst, HINSTANCE PrevInst,
|
|||||||
ret = execute_python_entrypoint(BASENAME, MODULE, FUNCTION,
|
ret = execute_python_entrypoint(BASENAME, MODULE, FUNCTION,
|
||||||
stdout_redirect, stderr_redirect);
|
stdout_redirect, stderr_redirect);
|
||||||
|
|
||||||
|
if (stdout != NULL) fclose(stdout);
|
||||||
|
if (stderr != NULL) fclose(stderr);
|
||||||
|
|
||||||
DeleteFile(stdout_redirect);
|
DeleteFile(stdout_redirect);
|
||||||
DeleteFile(stderr_redirect);
|
DeleteFile(stderr_redirect);
|
||||||
|
|
||||||
|
@ -69,7 +69,24 @@ nmake -f ms\ntdll.mak install
|
|||||||
Qt
|
Qt
|
||||||
--------
|
--------
|
||||||
|
|
||||||
Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::
|
Extract Qt sourcecode to C:\Qt\4.x.x.
|
||||||
|
|
||||||
|
Qt uses its own routine to locate and load "system libraries" including the openssl libraries needed for "Get Books". This means that we have to apply the following patch to have Qt load the openssl libraries bundled with calibre:
|
||||||
|
|
||||||
|
|
||||||
|
--- src/corelib/plugin/qsystemlibrary.cpp 2011-02-22 05:04:00.000000000 -0700
|
||||||
|
+++ src/corelib/plugin/qsystemlibrary.cpp 2011-04-25 20:53:13.635247466 -0600
|
||||||
|
@@ -110,7 +110,7 @@ HINSTANCE QSystemLibrary::load(const wch
|
||||||
|
|
||||||
|
#if !defined(QT_BOOTSTRAPPED)
|
||||||
|
if (!onlySystemDirectory)
|
||||||
|
- searchOrder << QFileInfo(qAppFileName()).path();
|
||||||
|
+ searchOrder << (QFileInfo(qAppFileName()).path().replace(QLatin1Char('/'), QLatin1Char('\\')) + QString::fromLatin1("\\DLLs\\"));
|
||||||
|
#endif
|
||||||
|
searchOrder << qSystemDirectory();
|
||||||
|
|
||||||
|
|
||||||
|
Now, run configure and make::
|
||||||
|
|
||||||
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
|
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
|
||||||
|
|
||||||
|
@ -11,7 +11,10 @@
|
|||||||
SummaryCodepage='1252' />
|
SummaryCodepage='1252' />
|
||||||
|
|
||||||
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
|
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
|
||||||
|
<!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
|
||||||
|
after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
|
||||||
|
<Property Id='REINSTALLMODE' Value='emus'/>
|
||||||
|
|
||||||
<Upgrade Id="{upgrade_code}">
|
<Upgrade Id="{upgrade_code}">
|
||||||
<UpgradeVersion Maximum="{version}"
|
<UpgradeVersion Maximum="{version}"
|
||||||
IncludeMaximum="yes"
|
IncludeMaximum="yes"
|
||||||
|
@ -347,9 +347,10 @@ class UploadUserManual(Command): # {{{
|
|||||||
with NamedTemporaryFile(suffix='.zip') as f:
|
with NamedTemporaryFile(suffix='.zip') as f:
|
||||||
os.fchmod(f.fileno(),
|
os.fchmod(f.fileno(),
|
||||||
stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE)
|
stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE)
|
||||||
with CurrentDir(self.d(path)):
|
with CurrentDir(path):
|
||||||
with ZipFile(f, 'w') as zf:
|
with ZipFile(f, 'w') as zf:
|
||||||
for x in os.listdir('.'):
|
for x in os.listdir('.'):
|
||||||
|
if x.endswith('.swp'): continue
|
||||||
zf.write(x)
|
zf.write(x)
|
||||||
if os.path.isdir(x):
|
if os.path.isdir(x):
|
||||||
for y in os.listdir(x):
|
for y in os.listdir(x):
|
||||||
|
@ -388,7 +388,11 @@ class CurrentDir(object):
|
|||||||
return self.cwd
|
return self.cwd
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
os.chdir(self.cwd)
|
try:
|
||||||
|
os.chdir(self.cwd)
|
||||||
|
except:
|
||||||
|
# The previous CWD no longer exists
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class StreamReadWrapper(object):
|
class StreamReadWrapper(object):
|
||||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = u'calibre'
|
__appname__ = u'calibre'
|
||||||
numeric_version = (0, 7, 57)
|
numeric_version = (0, 8, 0)
|
||||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
|
@ -449,7 +449,7 @@ class CatalogPlugin(Plugin): # {{{
|
|||||||
['author_sort','authors','comments','cover','formats',
|
['author_sort','authors','comments','cover','formats',
|
||||||
'id','isbn','ondevice','pubdate','publisher','rating',
|
'id','isbn','ondevice','pubdate','publisher','rating',
|
||||||
'series_index','series','size','tags','timestamp',
|
'series_index','series','size','tags','timestamp',
|
||||||
'title','uuid'])
|
'title_sort','title','uuid'])
|
||||||
all_custom_fields = set(db.custom_field_keys())
|
all_custom_fields = set(db.custom_field_keys())
|
||||||
all_fields = all_std_fields.union(all_custom_fields)
|
all_fields = all_std_fields.union(all_custom_fields)
|
||||||
|
|
||||||
@ -607,6 +607,7 @@ class StoreBase(Plugin): # {{{
|
|||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
author = 'John Schember'
|
author = 'John Schember'
|
||||||
type = _('Store')
|
type = _('Store')
|
||||||
|
minimum_calibre_version = (0, 8, 0)
|
||||||
|
|
||||||
actual_plugin = None
|
actual_plugin = None
|
||||||
|
|
||||||
|
@ -9,7 +9,6 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
|||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
||||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
from calibre.utils.config import test_eight_code
|
|
||||||
|
|
||||||
# To archive plugins {{{
|
# To archive plugins {{{
|
||||||
class HTML2ZIP(FileTypePlugin):
|
class HTML2ZIP(FileTypePlugin):
|
||||||
@ -596,6 +595,7 @@ from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
|||||||
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
||||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||||
from calibre.devices.prs505.driver import PRS505
|
from calibre.devices.prs505.driver import PRS505
|
||||||
|
from calibre.devices.user_defined.driver import USER_DEFINED
|
||||||
from calibre.devices.android.driver import ANDROID, S60
|
from calibre.devices.android.driver import ANDROID, S60
|
||||||
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
||||||
from calibre.devices.eslick.driver import ESLICK, EBK52
|
from calibre.devices.eslick.driver import ESLICK, EBK52
|
||||||
@ -613,6 +613,7 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
|
|||||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||||
from calibre.devices.kobo.driver import KOBO
|
from calibre.devices.kobo.driver import KOBO
|
||||||
from calibre.devices.bambook.driver import BAMBOOK
|
from calibre.devices.bambook.driver import BAMBOOK
|
||||||
|
from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
|
||||||
|
|
||||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
||||||
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||||
@ -621,29 +622,16 @@ from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
|||||||
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
||||||
Epubcheck, ]
|
Epubcheck, ]
|
||||||
|
|
||||||
if test_eight_code:
|
|
||||||
# New metadata download plugins {{{
|
# New metadata download plugins {{{
|
||||||
from calibre.ebooks.metadata.sources.google import GoogleBooks
|
from calibre.ebooks.metadata.sources.google import GoogleBooks
|
||||||
from calibre.ebooks.metadata.sources.amazon import Amazon
|
from calibre.ebooks.metadata.sources.amazon import Amazon
|
||||||
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
||||||
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||||
from calibre.ebooks.metadata.sources.douban import Douban
|
from calibre.ebooks.metadata.sources.douban import Douban
|
||||||
|
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
|
||||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
else:
|
|
||||||
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
|
|
||||||
KentDistrictLibrary
|
|
||||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
|
||||||
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
|
||||||
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
|
|
||||||
AmazonCovers, DoubanCovers
|
|
||||||
|
|
||||||
plugins += [GoogleBooks, ISBNDB, Amazon,
|
|
||||||
OpenLibraryCovers, AmazonCovers, DoubanCovers,
|
|
||||||
NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
|
|
||||||
|
|
||||||
plugins += [
|
plugins += [
|
||||||
ComicInput,
|
ComicInput,
|
||||||
@ -756,6 +744,9 @@ plugins += [
|
|||||||
EEEREADER,
|
EEEREADER,
|
||||||
NEXTBOOK,
|
NEXTBOOK,
|
||||||
ITUNES,
|
ITUNES,
|
||||||
|
BOEYE_BEX,
|
||||||
|
BOEYE_BDX,
|
||||||
|
USER_DEFINED,
|
||||||
]
|
]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
@ -868,10 +859,7 @@ plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
|
|||||||
ActionRestart, ActionOpenFolder, ActionConnectShare,
|
ActionRestart, ActionOpenFolder, ActionConnectShare,
|
||||||
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
|
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
|
||||||
ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
|
ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
|
||||||
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch]
|
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore]
|
||||||
|
|
||||||
if test_eight_code:
|
|
||||||
plugins += [ActionStore]
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
@ -1097,10 +1085,8 @@ class Misc(PreferencesPlugin):
|
|||||||
|
|
||||||
plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
|
plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
|
||||||
CommonOptions, OutputOptions, Adding, Saving, Sending, Plugboard,
|
CommonOptions, OutputOptions, Adding, Saving, Sending, Plugboard,
|
||||||
Email, Server, Plugins, Tweaks, Misc, TemplateFunctions]
|
Email, Server, Plugins, Tweaks, Misc, TemplateFunctions,
|
||||||
|
MetadataSources]
|
||||||
if test_eight_code:
|
|
||||||
plugins.append(MetadataSources)
|
|
||||||
|
|
||||||
#}}}
|
#}}}
|
||||||
|
|
||||||
@ -1110,6 +1096,11 @@ class StoreAmazonKindleStore(StoreBase):
|
|||||||
description = _('Kindle books from Amazon')
|
description = _('Kindle books from Amazon')
|
||||||
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
|
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
|
||||||
|
|
||||||
|
class StoreAmazonUKKindleStore(StoreBase):
|
||||||
|
name = 'Amazon UK Kindle'
|
||||||
|
description = _('Kindle books from Amazon.uk')
|
||||||
|
actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
|
||||||
|
|
||||||
class StoreBaenWebScriptionStore(StoreBase):
|
class StoreBaenWebScriptionStore(StoreBase):
|
||||||
name = 'Baen WebScription'
|
name = 'Baen WebScription'
|
||||||
description = _('Ebooks for readers.')
|
description = _('Ebooks for readers.')
|
||||||
@ -1175,10 +1166,27 @@ class StoreSmashwordsStore(StoreBase):
|
|||||||
description = _('Your ebook. Your way.')
|
description = _('Your ebook. Your way.')
|
||||||
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
|
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
|
||||||
|
|
||||||
plugins += [StoreAmazonKindleStore, StoreBaenWebScriptionStore, StoreBNStore,
|
class StoreWaterstonesUKStore(StoreBase):
|
||||||
|
name = 'Waterstones UK'
|
||||||
|
description = _('Feel every word')
|
||||||
|
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
|
||||||
|
|
||||||
|
class StoreFoylesUKStore(StoreBase):
|
||||||
|
name = 'Foyles UK'
|
||||||
|
description = _('Foyles of London, online')
|
||||||
|
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
|
||||||
|
|
||||||
|
class AmazonDEKindleStore(StoreBase):
|
||||||
|
name = 'Amazon DE Kindle'
|
||||||
|
description = _('Kindle eBooks')
|
||||||
|
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
|
||||||
|
|
||||||
|
plugins += [StoreAmazonKindleStore, AmazonDEKindleStore, StoreAmazonUKKindleStore,
|
||||||
|
StoreBaenWebScriptionStore, StoreBNStore,
|
||||||
StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore,
|
StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore,
|
||||||
StoreEHarlequinStoretore,
|
StoreEHarlequinStoretore, StoreFeedbooksStore,
|
||||||
StoreFeedbooksStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
|
StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
|
||||||
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore]
|
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
|
||||||
|
StoreWaterstonesUKStore]
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -15,12 +15,11 @@ from calibre.customize.profiles import InputProfile, OutputProfile
|
|||||||
from calibre.customize.builtins import plugins as builtin_plugins
|
from calibre.customize.builtins import plugins as builtin_plugins
|
||||||
from calibre.devices.interface import DevicePlugin
|
from calibre.devices.interface import DevicePlugin
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.metadata.covers import CoverDownload
|
from calibre.utils.config import (make_config_dir, Config, ConfigProxy,
|
||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
plugin_dir, OptionParser)
|
||||||
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
|
|
||||||
plugin_dir, OptionParser, prefs
|
|
||||||
from calibre.ebooks.epub.fix import ePubFixer
|
from calibre.ebooks.epub.fix import ePubFixer
|
||||||
from calibre.ebooks.metadata.sources.base import Source
|
from calibre.ebooks.metadata.sources.base import Source
|
||||||
|
from calibre.constants import DEBUG
|
||||||
|
|
||||||
builtin_names = frozenset([p.name for p in builtin_plugins])
|
builtin_names = frozenset([p.name for p in builtin_plugins])
|
||||||
|
|
||||||
@ -93,8 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
|
|||||||
config['enabled_plugins'] = ep
|
config['enabled_plugins'] = ep
|
||||||
|
|
||||||
default_disabled_plugins = set([
|
default_disabled_plugins = set([
|
||||||
'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
|
'Overdrive',
|
||||||
'Kent District Library'
|
|
||||||
])
|
])
|
||||||
|
|
||||||
def is_disabled(plugin):
|
def is_disabled(plugin):
|
||||||
@ -190,44 +188,6 @@ def output_profiles():
|
|||||||
yield plugin
|
yield plugin
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Metadata sources {{{
|
|
||||||
def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
|
|
||||||
for plugin in _initialized_plugins:
|
|
||||||
if isinstance(plugin, MetadataSource) and \
|
|
||||||
plugin.metadata_type == metadata_type:
|
|
||||||
if is_disabled(plugin):
|
|
||||||
continue
|
|
||||||
if customize:
|
|
||||||
customization = config['plugin_customization']
|
|
||||||
plugin.site_customization = customization.get(plugin.name, None)
|
|
||||||
if plugin.name == 'IsbnDB' and isbndb_key is not None:
|
|
||||||
plugin.site_customization = isbndb_key
|
|
||||||
yield plugin
|
|
||||||
|
|
||||||
def get_isbndb_key():
|
|
||||||
return config['plugin_customization'].get('IsbnDB', None)
|
|
||||||
|
|
||||||
def set_isbndb_key(key):
|
|
||||||
for plugin in _initialized_plugins:
|
|
||||||
if plugin.name == 'IsbnDB':
|
|
||||||
return customize_plugin(plugin, key)
|
|
||||||
|
|
||||||
def migrate_isbndb_key():
|
|
||||||
key = prefs['isbndb_com_key']
|
|
||||||
if key:
|
|
||||||
prefs.set('isbndb_com_key', '')
|
|
||||||
set_isbndb_key(key)
|
|
||||||
|
|
||||||
def cover_sources():
|
|
||||||
customization = config['plugin_customization']
|
|
||||||
for plugin in _initialized_plugins:
|
|
||||||
if isinstance(plugin, CoverDownload):
|
|
||||||
if not is_disabled(plugin):
|
|
||||||
plugin.site_customization = customization.get(plugin.name, '')
|
|
||||||
yield plugin
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
# Interface Actions # {{{
|
# Interface Actions # {{{
|
||||||
|
|
||||||
def interface_actions():
|
def interface_actions():
|
||||||
@ -527,8 +487,9 @@ def initialize_plugins():
|
|||||||
plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
|
plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
|
||||||
_initialized_plugins.append(plugin)
|
_initialized_plugins.append(plugin)
|
||||||
except:
|
except:
|
||||||
print 'Failed to initialize plugin...'
|
print 'Failed to initialize plugin:', repr(zfp)
|
||||||
traceback.print_exc()
|
if DEBUG:
|
||||||
|
traceback.print_exc()
|
||||||
_initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True)
|
_initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True)
|
||||||
reread_filetype_plugins()
|
reread_filetype_plugins()
|
||||||
reread_metadata_plugins()
|
reread_metadata_plugins()
|
||||||
|
@ -156,3 +156,60 @@ def debug(ioreg_to_tmp=False, buf=None):
|
|||||||
sys.stdout = oldo
|
sys.stdout = oldo
|
||||||
sys.stderr = olde
|
sys.stderr = olde
|
||||||
|
|
||||||
|
def device_info(ioreg_to_tmp=False, buf=None):
|
||||||
|
from calibre.devices.scanner import DeviceScanner, win_pnp_drives
|
||||||
|
from calibre.constants import iswindows
|
||||||
|
import re
|
||||||
|
|
||||||
|
res = {}
|
||||||
|
device_details = {}
|
||||||
|
device_set = set()
|
||||||
|
drive_details = {}
|
||||||
|
drive_set = set()
|
||||||
|
res['device_set'] = device_set
|
||||||
|
res['device_details'] = device_details
|
||||||
|
res['drive_details'] = drive_details
|
||||||
|
res['drive_set'] = drive_set
|
||||||
|
|
||||||
|
try:
|
||||||
|
s = DeviceScanner()
|
||||||
|
s.scan()
|
||||||
|
devices = (s.devices)
|
||||||
|
if not iswindows:
|
||||||
|
devices = [list(x) for x in devices]
|
||||||
|
for dev in devices:
|
||||||
|
for i in range(3):
|
||||||
|
dev[i] = hex(dev[i])
|
||||||
|
d = dev[0] + dev[1] + dev[2]
|
||||||
|
device_set.add(d)
|
||||||
|
device_details[d] = dev[0:3]
|
||||||
|
else:
|
||||||
|
for dev in devices:
|
||||||
|
vid = re.search('vid_([0-9a-f]*)&', dev)
|
||||||
|
if vid:
|
||||||
|
vid = vid.group(1)
|
||||||
|
pid = re.search('pid_([0-9a-f]*)&', dev)
|
||||||
|
if pid:
|
||||||
|
pid = pid.group(1)
|
||||||
|
rev = re.search('rev_([0-9a-f]*)$', dev)
|
||||||
|
if rev:
|
||||||
|
rev = rev.group(1)
|
||||||
|
d = vid+pid+rev
|
||||||
|
device_set.add(d)
|
||||||
|
device_details[d] = (vid, pid, rev)
|
||||||
|
|
||||||
|
drives = win_pnp_drives(debug=False)
|
||||||
|
for drive,details in drives.iteritems():
|
||||||
|
order = 'ORD_' + str(drive.order)
|
||||||
|
ven = re.search('VEN_([^&]*)&', details)
|
||||||
|
if ven:
|
||||||
|
ven = ven.group(1)
|
||||||
|
prod = re.search('PROD_([^&]*)&', details)
|
||||||
|
if prod:
|
||||||
|
prod = prod.group(1)
|
||||||
|
d = (order, ven, prod)
|
||||||
|
drive_details[drive] = d
|
||||||
|
drive_set.add(drive)
|
||||||
|
finally:
|
||||||
|
pass
|
||||||
|
return res
|
||||||
|
@ -62,7 +62,7 @@ class ANDROID(USBMS):
|
|||||||
0x502 : { 0x3203 : [0x0100]},
|
0x502 : { 0x3203 : [0x0100]},
|
||||||
|
|
||||||
# Dell
|
# Dell
|
||||||
0x413c : { 0xb007 : [0x0100, 0x0224]},
|
0x413c : { 0xb007 : [0x0100, 0x0224, 0x0226]},
|
||||||
|
|
||||||
# LG
|
# LG
|
||||||
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
|
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
|
||||||
@ -109,10 +109,10 @@ class ANDROID(USBMS):
|
|||||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
||||||
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
|
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
|
||||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE']
|
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||||
'A70S', 'A101IT', '7', 'INCREDIBLE']
|
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']
|
||||||
|
|
||||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||||
|
|
||||||
|
@ -163,6 +163,8 @@ class ITUNES(DriverBase):
|
|||||||
settings()
|
settings()
|
||||||
set_progress_reporter()
|
set_progress_reporter()
|
||||||
upload_books()
|
upload_books()
|
||||||
|
_get_fpath()
|
||||||
|
_update_epub_metadata()
|
||||||
add_books_to_metadata()
|
add_books_to_metadata()
|
||||||
use_plugboard_ext()
|
use_plugboard_ext()
|
||||||
set_plugboard()
|
set_plugboard()
|
||||||
@ -460,7 +462,7 @@ class ITUNES(DriverBase):
|
|||||||
|
|
||||||
cached_books[this_book.path] = {
|
cached_books[this_book.path] = {
|
||||||
'title':book.Name,
|
'title':book.Name,
|
||||||
'author':book.artist().split(' & '),
|
'author':book.Artist.split(' & '),
|
||||||
'lib_book':library_books[this_book.path] if this_book.path in library_books else None,
|
'lib_book':library_books[this_book.path] if this_book.path in library_books else None,
|
||||||
'uuid': book.Composer,
|
'uuid': book.Composer,
|
||||||
'format': 'pdf' if book.KindAsString.startswith('PDF') else 'epub'
|
'format': 'pdf' if book.KindAsString.startswith('PDF') else 'epub'
|
||||||
@ -504,7 +506,7 @@ class ITUNES(DriverBase):
|
|||||||
if self.iTunes:
|
if self.iTunes:
|
||||||
# Check for connected book-capable device
|
# Check for connected book-capable device
|
||||||
self.sources = self._get_sources()
|
self.sources = self._get_sources()
|
||||||
if 'iPod' in self.sources:
|
if 'iPod' in self.sources and not self.ejected:
|
||||||
#if DEBUG:
|
#if DEBUG:
|
||||||
#sys.stdout.write('.')
|
#sys.stdout.write('.')
|
||||||
#sys.stdout.flush()
|
#sys.stdout.flush()
|
||||||
@ -2034,16 +2036,17 @@ class ITUNES(DriverBase):
|
|||||||
if 'iPod' in self.sources:
|
if 'iPod' in self.sources:
|
||||||
connected_device = self.sources['iPod']
|
connected_device = self.sources['iPod']
|
||||||
device = self.iTunes.sources[connected_device]
|
device = self.iTunes.sources[connected_device]
|
||||||
|
dev_books = None
|
||||||
for pl in device.playlists():
|
for pl in device.playlists():
|
||||||
if pl.special_kind() == appscript.k.Books:
|
if pl.special_kind() == appscript.k.Books:
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.info(" Book playlist: '%s'" % (pl.name()))
|
self.log.info(" Book playlist: '%s'" % (pl.name()))
|
||||||
books = pl.file_tracks()
|
dev_books = pl.file_tracks()
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
self.log.error(" book_playlist not found")
|
self.log.error(" book_playlist not found")
|
||||||
|
|
||||||
for book in books:
|
for book in dev_books:
|
||||||
# This may need additional entries for international iTunes users
|
# This may need additional entries for international iTunes users
|
||||||
if book.kind() in self.Audiobooks:
|
if book.kind() in self.Audiobooks:
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@ -2621,42 +2624,42 @@ class ITUNES(DriverBase):
|
|||||||
# Touch the OPF timestamp
|
# Touch the OPF timestamp
|
||||||
try:
|
try:
|
||||||
zf_opf = ZipFile(fpath,'r')
|
zf_opf = ZipFile(fpath,'r')
|
||||||
|
fnames = zf_opf.namelist()
|
||||||
|
opf = [x for x in fnames if '.opf' in x][0]
|
||||||
except:
|
except:
|
||||||
raise UserFeedback("'%s' is not a valid EPUB" % metadata.title,
|
raise UserFeedback("'%s' is not a valid EPUB" % metadata.title,
|
||||||
None,
|
None,
|
||||||
level=UserFeedback.WARN)
|
level=UserFeedback.WARN)
|
||||||
fnames = zf_opf.namelist()
|
|
||||||
opf = [x for x in fnames if '.opf' in x][0]
|
opf_tree = etree.fromstring(zf_opf.read(opf))
|
||||||
if opf:
|
md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
|
||||||
opf_tree = etree.fromstring(zf_opf.read(opf))
|
if md_els:
|
||||||
md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
|
ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
|
||||||
if md_els:
|
if ts is not None:
|
||||||
ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
|
timestamp = ts.get('content')
|
||||||
if ts is not None:
|
old_ts = parse_date(timestamp)
|
||||||
timestamp = ts.get('content')
|
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
||||||
old_ts = parse_date(timestamp)
|
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
||||||
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
if DEBUG:
|
||||||
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
self.log.info(" existing timestamp: %s" % metadata.timestamp)
|
||||||
if DEBUG:
|
|
||||||
self.log.info(" existing timestamp: %s" % metadata.timestamp)
|
|
||||||
else:
|
|
||||||
metadata.timestamp = now()
|
|
||||||
if DEBUG:
|
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
|
||||||
else:
|
else:
|
||||||
metadata.timestamp = now()
|
metadata.timestamp = now()
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.warning(" missing <metadata> block in OPF file")
|
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
# Force the language declaration for iBooks 1.1
|
else:
|
||||||
#metadata.language = get_lang().replace('_', '-')
|
metadata.timestamp = now()
|
||||||
|
|
||||||
# Updates from metadata plugboard (ignoring publisher)
|
|
||||||
metadata.language = metadata_x.language
|
|
||||||
|
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
if metadata.language != metadata_x.language:
|
self.log.warning(" missing <metadata> block in OPF file")
|
||||||
self.log.info(" rewriting language: <dc:language>%s</dc:language>" % metadata.language)
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
|
# Force the language declaration for iBooks 1.1
|
||||||
|
#metadata.language = get_lang().replace('_', '-')
|
||||||
|
|
||||||
|
# Updates from metadata plugboard (ignoring publisher)
|
||||||
|
metadata.language = metadata_x.language
|
||||||
|
|
||||||
|
if DEBUG:
|
||||||
|
if metadata.language != metadata_x.language:
|
||||||
|
self.log.info(" rewriting language: <dc:language>%s</dc:language>" % metadata.language)
|
||||||
|
|
||||||
zf_opf.close()
|
zf_opf.close()
|
||||||
|
|
||||||
|
0
src/calibre/devices/boeye/__init__.py
Normal file
0
src/calibre/devices/boeye/__init__.py
Normal file
56
src/calibre/devices/boeye/driver.py
Normal file
56
src/calibre/devices/boeye/driver.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Ken <ken at szboeye.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Device driver for BOEYE serial readers
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.devices.usbms.driver import USBMS
|
||||||
|
|
||||||
|
class BOEYE_BEX(USBMS):
|
||||||
|
name = 'BOEYE BEX reader driver'
|
||||||
|
gui_name = 'BOEYE BEX'
|
||||||
|
description = _('Communicate with BOEYE BEX Serial eBook readers.')
|
||||||
|
author = 'szboeye'
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
|
FORMATS = ['epub', 'mobi', 'fb2', 'lit', 'prc', 'pdf', 'rtf', 'txt', 'djvu', 'doc', 'chm', 'html', 'zip', 'pdb']
|
||||||
|
|
||||||
|
VENDOR_ID = [0x0085]
|
||||||
|
PRODUCT_ID = [0x600]
|
||||||
|
|
||||||
|
VENDOR_NAME = 'LINUX'
|
||||||
|
WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
|
||||||
|
OSX_MAIN_MEM = 'Linux File-Stor Gadget Media'
|
||||||
|
|
||||||
|
MAIN_MEMORY_VOLUME_LABEL = 'BOEYE BEX Storage Card'
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = 'Documents'
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class BOEYE_BDX(USBMS):
|
||||||
|
name = 'BOEYE BDX reader driver'
|
||||||
|
gui_name = 'BOEYE BDX'
|
||||||
|
description = _('Communicate with BOEYE BDX serial eBook readers.')
|
||||||
|
author = 'szboeye'
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
|
FORMATS = ['epub', 'mobi', 'fb2', 'lit', 'prc', 'pdf', 'rtf', 'txt', 'djvu', 'doc', 'chm', 'html', 'zip', 'pdb']
|
||||||
|
|
||||||
|
VENDOR_ID = [0x0085]
|
||||||
|
PRODUCT_ID = [0x800]
|
||||||
|
|
||||||
|
VENDOR_NAME = 'LINUX'
|
||||||
|
WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
|
||||||
|
WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
|
||||||
|
|
||||||
|
OSX_MAIN_MEM = 'Linux File-Stor Gadget Media'
|
||||||
|
OSX_CARD_A_MEM = 'Linux File-Stor Gadget Media'
|
||||||
|
|
||||||
|
MAIN_MEMORY_VOLUME_LABEL = 'BOEYE BDX Internal Memory'
|
||||||
|
STORAGE_CARD_VOLUME_LABEL = 'BOEYE BDX Storage Card'
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = 'Documents'
|
||||||
|
EBOOK_DIR_CARD_A = 'Documents'
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
@ -64,7 +64,7 @@ class HANLINV3(USBMS):
|
|||||||
return names
|
return names
|
||||||
|
|
||||||
def linux_swap_drives(self, drives):
|
def linux_swap_drives(self, drives):
|
||||||
if len(drives) < 2: return drives
|
if len(drives) < 2 or not drives[1] or not drives[2]: return drives
|
||||||
drives = list(drives)
|
drives = list(drives)
|
||||||
t = drives[0]
|
t = drives[0]
|
||||||
drives[0] = drives[1]
|
drives[0] = drives[1]
|
||||||
@ -95,7 +95,6 @@ class HANLINV5(HANLINV3):
|
|||||||
gui_name = 'Hanlin V5'
|
gui_name = 'Hanlin V5'
|
||||||
description = _('Communicate with Hanlin V5 eBook readers.')
|
description = _('Communicate with Hanlin V5 eBook readers.')
|
||||||
|
|
||||||
|
|
||||||
VENDOR_ID = [0x0492]
|
VENDOR_ID = [0x0492]
|
||||||
PRODUCT_ID = [0x8813]
|
PRODUCT_ID = [0x8813]
|
||||||
BCD = [0x319]
|
BCD = [0x319]
|
||||||
|
@ -164,7 +164,7 @@ class APNXBuilder(object):
|
|||||||
if c == '/':
|
if c == '/':
|
||||||
closing = True
|
closing = True
|
||||||
continue
|
continue
|
||||||
elif c in ('d', 'p'):
|
elif c == 'p':
|
||||||
if closing:
|
if closing:
|
||||||
in_p = False
|
in_p = False
|
||||||
else:
|
else:
|
||||||
|
@ -187,7 +187,7 @@ class LUMIREAD(USBMS):
|
|||||||
cfilepath = cfilepath.replace(os.sep+'books'+os.sep,
|
cfilepath = cfilepath.replace(os.sep+'books'+os.sep,
|
||||||
os.sep+'covers'+os.sep, 1)
|
os.sep+'covers'+os.sep, 1)
|
||||||
pdir = os.path.dirname(cfilepath)
|
pdir = os.path.dirname(cfilepath)
|
||||||
if not os.exists(pdir):
|
if not os.path.exists(pdir):
|
||||||
os.makedirs(pdir)
|
os.makedirs(pdir)
|
||||||
with open(cfilepath+'.jpg', 'wb') as f:
|
with open(cfilepath+'.jpg', 'wb') as f:
|
||||||
f.write(metadata.thumbnail[-1])
|
f.write(metadata.thumbnail[-1])
|
||||||
|
@ -94,6 +94,9 @@ class DeviceConfig(object):
|
|||||||
if isinstance(cls.EXTRA_CUSTOMIZATION_MESSAGE, list):
|
if isinstance(cls.EXTRA_CUSTOMIZATION_MESSAGE, list):
|
||||||
ec = []
|
ec = []
|
||||||
for i in range(0, len(cls.EXTRA_CUSTOMIZATION_MESSAGE)):
|
for i in range(0, len(cls.EXTRA_CUSTOMIZATION_MESSAGE)):
|
||||||
|
if config_widget.opt_extra_customization[i] is None:
|
||||||
|
ec.append(None)
|
||||||
|
continue
|
||||||
if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
|
if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
|
||||||
ec.append(config_widget.opt_extra_customization[i].isChecked())
|
ec.append(config_widget.opt_extra_customization[i].isChecked())
|
||||||
else:
|
else:
|
||||||
|
0
src/calibre/devices/user_defined/__init__.py
Normal file
0
src/calibre/devices/user_defined/__init__.py
Normal file
110
src/calibre/devices/user_defined/driver.py
Normal file
110
src/calibre/devices/user_defined/driver.py
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.devices.usbms.driver import USBMS
|
||||||
|
|
||||||
|
class USER_DEFINED(USBMS):
|
||||||
|
|
||||||
|
name = 'User Defined USB driver'
|
||||||
|
gui_name = 'User Defined USB Device'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
|
# Ordered list of supported formats
|
||||||
|
FORMATS = ['epub', 'mobi', 'pdf']
|
||||||
|
|
||||||
|
VENDOR_ID = 0xFFFF
|
||||||
|
PRODUCT_ID = 0xFFFF
|
||||||
|
BCD = None
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = ''
|
||||||
|
EBOOK_DIR_CARD_A = ''
|
||||||
|
|
||||||
|
VENDOR_NAME = []
|
||||||
|
WINDOWS_MAIN_MEM = ''
|
||||||
|
WINDOWS_CARD_A_MEM = ''
|
||||||
|
|
||||||
|
OSX_MAIN_MEM = 'Device Main Memory'
|
||||||
|
|
||||||
|
MAIN_MEMORY_VOLUME_LABEL = 'Device Main Memory'
|
||||||
|
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
EXTRA_CUSTOMIZATION_MESSAGE = [
|
||||||
|
_('USB Vendor ID (in hex)') + ':::<p>' +
|
||||||
|
_('Get this ID using Preferences -> Misc -> Get information to '
|
||||||
|
'set up the user-defined device') + '</p>',
|
||||||
|
_('USB Product ID (in hex)')+ ':::<p>' +
|
||||||
|
_('Get this ID using Preferences -> Misc -> Get information to '
|
||||||
|
'set up the user-defined device') + '</p>',
|
||||||
|
_('USB Revision ID (in hex)')+ ':::<p>' +
|
||||||
|
_('Get this ID using Preferences -> Misc -> Get information to '
|
||||||
|
'set up the user-defined device') + '</p>',
|
||||||
|
'',
|
||||||
|
_('Windows main memory vendor string') + ':::<p>' +
|
||||||
|
_('This field is used only on windows. '
|
||||||
|
'Get this ID using Preferences -> Misc -> Get information to '
|
||||||
|
'set up the user-defined device') + '</p>',
|
||||||
|
_('Windows main memory ID string') + ':::<p>' +
|
||||||
|
_('This field is used only on windows. '
|
||||||
|
'Get this ID using Preferences -> Misc -> Get information to '
|
||||||
|
'set up the user-defined device') + '</p>',
|
||||||
|
_('Windows card A vendor string') + ':::<p>' +
|
||||||
|
_('This field is used only on windows. '
|
||||||
|
'Get this ID using Preferences -> Misc -> Get information to '
|
||||||
|
'set up the user-defined device') + '</p>',
|
||||||
|
_('Windows card A ID string') + ':::<p>' +
|
||||||
|
_('This field is used only on windows. '
|
||||||
|
'Get this ID using Preferences -> Misc -> Get information to '
|
||||||
|
'set up the user-defined device') + '</p>',
|
||||||
|
_('Main memory folder') + ':::<p>' +
|
||||||
|
_('Enter the folder where the books are to be stored. This folder '
|
||||||
|
'is prepended to any send_to_device template') + '</p>',
|
||||||
|
_('Card A folder') + ':::<p>' +
|
||||||
|
_('Enter the folder where the books are to be stored. This folder '
|
||||||
|
'is prepended to any send_to_device template') + '</p>',
|
||||||
|
]
|
||||||
|
EXTRA_CUSTOMIZATION_DEFAULT = [
|
||||||
|
'0xffff',
|
||||||
|
'0xffff',
|
||||||
|
'0xffff',
|
||||||
|
None,
|
||||||
|
'',
|
||||||
|
'',
|
||||||
|
'',
|
||||||
|
'',
|
||||||
|
'',
|
||||||
|
'',
|
||||||
|
]
|
||||||
|
OPT_USB_VENDOR_ID = 0
|
||||||
|
OPT_USB_PRODUCT_ID = 1
|
||||||
|
OPT_USB_REVISION_ID = 2
|
||||||
|
OPT_USB_WINDOWS_MM_VEN_ID = 4
|
||||||
|
OPT_USB_WINDOWS_MM_ID = 5
|
||||||
|
OPT_USB_WINDOWS_CA_VEN_ID = 6
|
||||||
|
OPT_USB_WINDOWS_CA_ID = 7
|
||||||
|
OPT_MAIN_MEM_FOLDER = 8
|
||||||
|
OPT_CARD_A_FOLDER = 9
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
try:
|
||||||
|
e = self.settings().extra_customization
|
||||||
|
self.VENDOR_ID = int(e[self.OPT_USB_VENDOR_ID], 16)
|
||||||
|
self.PRODUCT_ID = int(e[self.OPT_USB_PRODUCT_ID], 16)
|
||||||
|
self.BCD = [int(e[self.OPT_USB_REVISION_ID], 16)]
|
||||||
|
if e[self.OPT_USB_WINDOWS_MM_VEN_ID]:
|
||||||
|
self.VENDOR_NAME.append(e[self.OPT_USB_WINDOWS_MM_VEN_ID])
|
||||||
|
if e[self.OPT_USB_WINDOWS_CA_VEN_ID] and \
|
||||||
|
e[self.OPT_USB_WINDOWS_CA_VEN_ID] not in self.VENDOR_NAME:
|
||||||
|
self.VENDOR_NAME.append(e[self.OPT_USB_WINDOWS_CA_VEN_ID])
|
||||||
|
self.WINDOWS_MAIN_MEM = e[self.OPT_USB_WINDOWS_MM_ID] + '&'
|
||||||
|
self.WINDOWS_CARD_A_MEM = e[self.OPT_USB_WINDOWS_CA_ID] + '&'
|
||||||
|
self.EBOOK_DIR_MAIN = e[self.OPT_MAIN_MEM_FOLDER]
|
||||||
|
self.EBOOK_DIR_CARD_A = e[self.OPT_CARD_A_FOLDER]
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
USBMS.initialize(self)
|
@ -19,12 +19,12 @@ class CHMInput(InputFormatPlugin):
|
|||||||
description = 'Convert CHM files to OEB'
|
description = 'Convert CHM files to OEB'
|
||||||
file_types = set(['chm'])
|
file_types = set(['chm'])
|
||||||
|
|
||||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
|
||||||
from calibre.ebooks.chm.reader import CHMReader
|
from calibre.ebooks.chm.reader import CHMReader
|
||||||
log.debug('Opening CHM file')
|
log.debug('Opening CHM file')
|
||||||
rdr = CHMReader(chm_path, log, self.opts)
|
rdr = CHMReader(chm_path, log, self.opts)
|
||||||
log.debug('Extracting CHM to %s' % output_dir)
|
log.debug('Extracting CHM to %s' % output_dir)
|
||||||
rdr.extract_content(output_dir)
|
rdr.extract_content(output_dir, debug_dump=debug_dump)
|
||||||
self._chm_reader = rdr
|
self._chm_reader = rdr
|
||||||
return rdr.hhc_path
|
return rdr.hhc_path
|
||||||
|
|
||||||
@ -47,7 +47,12 @@ class CHMInput(InputFormatPlugin):
|
|||||||
stream.close()
|
stream.close()
|
||||||
log.debug('tdir=%s' % tdir)
|
log.debug('tdir=%s' % tdir)
|
||||||
log.debug('stream.name=%s' % stream.name)
|
log.debug('stream.name=%s' % stream.name)
|
||||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
debug_dump = False
|
||||||
|
odi = options.debug_pipeline
|
||||||
|
if odi:
|
||||||
|
debug_dump = os.path.join(odi, 'input')
|
||||||
|
mainname = self._chmtohtml(tdir, chm_name, no_images, log,
|
||||||
|
debug_dump=debug_dump)
|
||||||
mainpath = os.path.join(tdir, mainname)
|
mainpath = os.path.join(tdir, mainname)
|
||||||
|
|
||||||
metadata = get_metadata_from_reader(self._chm_reader)
|
metadata = get_metadata_from_reader(self._chm_reader)
|
||||||
@ -56,7 +61,6 @@ class CHMInput(InputFormatPlugin):
|
|||||||
#from calibre import ipython
|
#from calibre import ipython
|
||||||
#ipython()
|
#ipython()
|
||||||
|
|
||||||
odi = options.debug_pipeline
|
|
||||||
options.debug_pipeline = None
|
options.debug_pipeline = None
|
||||||
options.input_encoding = 'utf-8'
|
options.input_encoding = 'utf-8'
|
||||||
# try a custom conversion:
|
# try a custom conversion:
|
||||||
|
@ -97,7 +97,7 @@ class CHMReader(CHMFile):
|
|||||||
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def ExtractFiles(self, output_dir=os.getcwdu()):
|
def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
|
||||||
html_files = set([])
|
html_files = set([])
|
||||||
for path in self.Contents():
|
for path in self.Contents():
|
||||||
lpath = os.path.join(output_dir, path)
|
lpath = os.path.join(output_dir, path)
|
||||||
@ -123,6 +123,9 @@ class CHMReader(CHMFile):
|
|||||||
self.log.warn('%r filename too long, skipping'%path)
|
self.log.warn('%r filename too long, skipping'%path)
|
||||||
continue
|
continue
|
||||||
raise
|
raise
|
||||||
|
if debug_dump:
|
||||||
|
import shutil
|
||||||
|
shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
|
||||||
for lpath in html_files:
|
for lpath in html_files:
|
||||||
with open(lpath, 'r+b') as f:
|
with open(lpath, 'r+b') as f:
|
||||||
data = f.read()
|
data = f.read()
|
||||||
@ -249,8 +252,8 @@ class CHMReader(CHMFile):
|
|||||||
if not os.path.isdir(dir):
|
if not os.path.isdir(dir):
|
||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
|
|
||||||
def extract_content(self, output_dir=os.getcwdu()):
|
def extract_content(self, output_dir=os.getcwdu(), debug_dump=False):
|
||||||
self.ExtractFiles(output_dir=output_dir)
|
self.ExtractFiles(output_dir=output_dir, debug_dump=debug_dump)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -854,7 +854,8 @@ OptionRecommendation(name='sr3_replace',
|
|||||||
if isinstance(ret, basestring):
|
if isinstance(ret, basestring):
|
||||||
shutil.copytree(output_dir, out_dir)
|
shutil.copytree(output_dir, out_dir)
|
||||||
else:
|
else:
|
||||||
os.makedirs(out_dir)
|
if not os.path.exists(out_dir):
|
||||||
|
os.makedirs(out_dir)
|
||||||
self.dump_oeb(ret, out_dir)
|
self.dump_oeb(ret, out_dir)
|
||||||
if self.input_fmt == 'recipe':
|
if self.input_fmt == 'recipe':
|
||||||
zf = ZipFile(os.path.join(self.opts.debug_pipeline,
|
zf = ZipFile(os.path.join(self.opts.debug_pipeline,
|
||||||
|
@ -402,7 +402,7 @@ class HTMLPreProcessor(object):
|
|||||||
(re.compile(r'((?<=</a>)\s*file:/{2,4}[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
|
(re.compile(r'((?<=</a>)\s*file:/{2,4}[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
|
||||||
|
|
||||||
# Center separator lines
|
# Center separator lines
|
||||||
(re.compile(u'<br>\s*(?P<break>([*#•✦=]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
|
(re.compile(u'<br>\s*(?P<break>([*#•✦=] *){3,})\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group('break') + '</p>'),
|
||||||
|
|
||||||
# Remove page links
|
# Remove page links
|
||||||
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
||||||
|
@ -156,17 +156,17 @@ class HeuristicProcessor(object):
|
|||||||
]
|
]
|
||||||
|
|
||||||
ITALICIZE_STYLE_PATS = [
|
ITALICIZE_STYLE_PATS = [
|
||||||
r'(?msu)(?<=[\s>])_(?P<words>[^_]+)_',
|
ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
|
||||||
r'(?msu)(?<=[\s>])/(?P<words>[^/\*>]+)/',
|
ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*>]+)/',
|
||||||
r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)~~',
|
ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
|
||||||
r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)\*',
|
ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
|
||||||
r'(?msu)(?<=[\s>])~(?P<words>[^~]+)~',
|
ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
|
||||||
r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)/_',
|
ur'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_',
|
||||||
r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)\*_',
|
ur'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_',
|
||||||
r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)/\*',
|
ur'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*',
|
||||||
r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)/\*_',
|
ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_',
|
||||||
r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+):/',
|
ur'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/',
|
||||||
r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+):\|',
|
ur'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|',
|
||||||
]
|
]
|
||||||
|
|
||||||
for word in ITALICIZE_WORDS:
|
for word in ITALICIZE_WORDS:
|
||||||
@ -518,13 +518,13 @@ class HeuristicProcessor(object):
|
|||||||
if re.findall('(<|>)', replacement_break):
|
if re.findall('(<|>)', replacement_break):
|
||||||
if re.match('^<hr', replacement_break):
|
if re.match('^<hr', replacement_break):
|
||||||
if replacement_break.find('width') != -1:
|
if replacement_break.find('width') != -1:
|
||||||
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
|
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
|
||||||
replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
|
replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
|
||||||
divpercent = (100 - width) / 2
|
divpercent = (100 - width) / 2
|
||||||
hr_open = re.sub('45', str(divpercent), hr_open)
|
hr_open = re.sub('45', str(divpercent), hr_open)
|
||||||
scene_break = hr_open+replacement_break+'</div>'
|
scene_break = hr_open+replacement_break+'</div>'
|
||||||
else:
|
else:
|
||||||
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||||
elif re.match('^<img', replacement_break):
|
elif re.match('^<img', replacement_break):
|
||||||
scene_break = self.scene_break_open+replacement_break+'</p>'
|
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||||
else:
|
else:
|
||||||
@ -584,10 +584,10 @@ class HeuristicProcessor(object):
|
|||||||
#print "styles for this line are: "+str(styles)
|
#print "styles for this line are: "+str(styles)
|
||||||
split_styles = []
|
split_styles = []
|
||||||
for style in styles:
|
for style in styles:
|
||||||
#print "style is: "+str(style)
|
#print "style is: "+str(style)
|
||||||
newstyle = style.split(':')
|
newstyle = style.split(':')
|
||||||
#print "newstyle is: "+str(newstyle)
|
#print "newstyle is: "+str(newstyle)
|
||||||
split_styles.append(newstyle)
|
split_styles.append(newstyle)
|
||||||
styles = split_styles
|
styles = split_styles
|
||||||
for style, setting in styles:
|
for style, setting in styles:
|
||||||
if style == 'text-align' and setting != 'left':
|
if style == 'text-align' and setting != 'left':
|
||||||
|
@ -309,9 +309,9 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
from calibre.ebooks.oeb.base import DirContainer, \
|
from calibre.ebooks.oeb.base import (DirContainer,
|
||||||
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, \
|
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
|
||||||
xpath
|
xpath)
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
from calibre.ebooks.oeb.transforms.metadata import \
|
from calibre.ebooks.oeb.transforms.metadata import \
|
||||||
meta_info_to_oeb_metadata
|
meta_info_to_oeb_metadata
|
||||||
@ -345,7 +345,8 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
htmlfile_map = {}
|
htmlfile_map = {}
|
||||||
for f in filelist:
|
for f in filelist:
|
||||||
path = f.path
|
path = f.path
|
||||||
oeb.container = DirContainer(os.path.dirname(path), log)
|
oeb.container = DirContainer(os.path.dirname(path), log,
|
||||||
|
ignore_opf=True)
|
||||||
bname = os.path.basename(path)
|
bname = os.path.basename(path)
|
||||||
id, href = oeb.manifest.generate(id='html',
|
id, href = oeb.manifest.generate(id='html',
|
||||||
href=ascii_filename(bname))
|
href=ascii_filename(bname))
|
||||||
@ -369,7 +370,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
for f in filelist:
|
for f in filelist:
|
||||||
path = f.path
|
path = f.path
|
||||||
dpath = os.path.dirname(path)
|
dpath = os.path.dirname(path)
|
||||||
oeb.container = DirContainer(dpath, log)
|
oeb.container = DirContainer(dpath, log, ignore_opf=True)
|
||||||
item = oeb.manifest.hrefs[htmlfile_map[path]]
|
item = oeb.manifest.hrefs[htmlfile_map[path]]
|
||||||
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
||||||
|
|
||||||
@ -409,7 +410,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
if not item.linear: continue
|
if not item.linear: continue
|
||||||
toc.add(title, item.href)
|
toc.add(title, item.href)
|
||||||
|
|
||||||
oeb.container = DirContainer(os.getcwdu(), oeb.log)
|
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
|
||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
def link_to_local_path(self, link_, base=None):
|
def link_to_local_path(self, link_, base=None):
|
||||||
@ -456,7 +457,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
href=bhref)
|
href=bhref)
|
||||||
self.oeb.log.debug('Added', link)
|
self.oeb.log.debug('Added', link)
|
||||||
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
||||||
self.oeb.log)
|
self.oeb.log, ignore_opf=True)
|
||||||
# Load into memory
|
# Load into memory
|
||||||
guessed = self.guess_type(href)[0]
|
guessed = self.guess_type(href)[0]
|
||||||
media_type = guessed or self.BINARY_MIME
|
media_type = guessed or self.BINARY_MIME
|
||||||
|
@ -7,10 +7,12 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import posixpath
|
||||||
|
|
||||||
from calibre import walk
|
from calibre import guess_type, walk
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
class HTMLZInput(InputFormatPlugin):
|
class HTMLZInput(InputFormatPlugin):
|
||||||
@ -27,7 +29,7 @@ class HTMLZInput(InputFormatPlugin):
|
|||||||
|
|
||||||
# Extract content from zip archive.
|
# Extract content from zip archive.
|
||||||
zf = ZipFile(stream)
|
zf = ZipFile(stream)
|
||||||
zf.extractall('.')
|
zf.extractall()
|
||||||
|
|
||||||
for x in walk('.'):
|
for x in walk('.'):
|
||||||
if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
|
if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
|
||||||
@ -70,5 +72,24 @@ class HTMLZInput(InputFormatPlugin):
|
|||||||
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
|
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
|
||||||
mi = get_file_type_metadata(stream, file_ext)
|
mi = get_file_type_metadata(stream, file_ext)
|
||||||
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
|
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
|
||||||
|
|
||||||
|
# Get the cover path from the OPF.
|
||||||
|
cover_href = None
|
||||||
|
opf = None
|
||||||
|
for x in walk('.'):
|
||||||
|
if os.path.splitext(x)[1].lower() in ('.opf'):
|
||||||
|
opf = x
|
||||||
|
break
|
||||||
|
if opf:
|
||||||
|
opf = OPF(opf)
|
||||||
|
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
|
||||||
|
# Set the cover.
|
||||||
|
if cover_href:
|
||||||
|
cdata = None
|
||||||
|
with open(cover_href, 'rb') as cf:
|
||||||
|
cdata = cf.read()
|
||||||
|
id, href = oeb.manifest.generate('cover', cover_href)
|
||||||
|
oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata)
|
||||||
|
oeb.guide.add('cover', 'Cover', href)
|
||||||
|
|
||||||
return oeb
|
return oeb
|
||||||
|
@ -7,11 +7,13 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||||
OptionRecommendation
|
OptionRecommendation
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
@ -79,10 +81,31 @@ class HTMLZOutput(OutputFormatPlugin):
|
|||||||
fname = os.path.join(tdir, 'images', images[item.href])
|
fname = os.path.join(tdir, 'images', images[item.href])
|
||||||
with open(fname, 'wb') as img:
|
with open(fname, 'wb') as img:
|
||||||
img.write(data)
|
img.write(data)
|
||||||
|
|
||||||
|
# Cover
|
||||||
|
cover_path = None
|
||||||
|
try:
|
||||||
|
cover_data = None
|
||||||
|
if oeb_book.metadata.cover:
|
||||||
|
term = oeb_book.metadata.cover[0].term
|
||||||
|
cover_data = oeb_book.guide[term].item.data
|
||||||
|
if cover_data:
|
||||||
|
from calibre.utils.magick.draw import save_cover_data_to
|
||||||
|
cover_path = os.path.join(tdir, 'cover.jpg')
|
||||||
|
with open(cover_path, 'w') as cf:
|
||||||
|
cf.write('')
|
||||||
|
save_cover_data_to(cover_data, cover_path)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
# Metadata
|
# Metadata
|
||||||
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
|
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
|
||||||
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
|
opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1())))
|
||||||
|
mi = opf.to_book_metadata()
|
||||||
|
if cover_path:
|
||||||
|
mi.cover = 'cover.jpg'
|
||||||
|
mdataf.write(metadata_to_opf(mi))
|
||||||
|
|
||||||
htmlz = ZipFile(output_path, 'w')
|
htmlz = ZipFile(output_path, 'w')
|
||||||
htmlz.add_dir(tdir)
|
htmlz.add_dir(tdir)
|
||||||
|
@ -274,6 +274,9 @@ def check_isbn(isbn):
|
|||||||
if not isbn:
|
if not isbn:
|
||||||
return None
|
return None
|
||||||
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
|
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
|
||||||
|
all_same = re.match(r'(\d)\1{9,12}$', isbn)
|
||||||
|
if all_same is not None:
|
||||||
|
return None
|
||||||
if len(isbn) == 10:
|
if len(isbn) == 10:
|
||||||
return check_isbn10(isbn)
|
return check_isbn10(isbn)
|
||||||
if len(isbn) == 13:
|
if len(isbn) == 13:
|
||||||
|
@ -1,224 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Fetch metadata using Amazon AWS
|
|
||||||
'''
|
|
||||||
import sys, re
|
|
||||||
from threading import RLock
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
from lxml.html import soupparser
|
|
||||||
|
|
||||||
from calibre import browser
|
|
||||||
from calibre.ebooks.metadata import check_isbn
|
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
|
||||||
from calibre.library.comments import sanitize_comments_html
|
|
||||||
|
|
||||||
asin_cache = {}
|
|
||||||
cover_url_cache = {}
|
|
||||||
cache_lock = RLock()
|
|
||||||
|
|
||||||
def find_asin(br, isbn):
|
|
||||||
q = 'http://www.amazon.com/s/?search-alias=aps&field-keywords='+isbn
|
|
||||||
res = br.open_novisit(q)
|
|
||||||
raw = res.read()
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
root = html.fromstring(raw)
|
|
||||||
revs = root.xpath('//*[@class="asinReviewsSummary" and @name]')
|
|
||||||
revs = [x.get('name') for x in revs]
|
|
||||||
if revs:
|
|
||||||
return revs[0]
|
|
||||||
|
|
||||||
def to_asin(br, isbn):
|
|
||||||
with cache_lock:
|
|
||||||
ans = asin_cache.get(isbn, None)
|
|
||||||
if ans:
|
|
||||||
return ans
|
|
||||||
if ans is False:
|
|
||||||
return None
|
|
||||||
if len(isbn) == 13:
|
|
||||||
try:
|
|
||||||
asin = find_asin(br, isbn)
|
|
||||||
except:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
asin = None
|
|
||||||
else:
|
|
||||||
asin = isbn
|
|
||||||
with cache_lock:
|
|
||||||
asin_cache[isbn] = asin if asin else False
|
|
||||||
return asin
|
|
||||||
|
|
||||||
|
|
||||||
def get_social_metadata(title, authors, publisher, isbn):
|
|
||||||
mi = Metadata(title, authors)
|
|
||||||
if not isbn:
|
|
||||||
return mi
|
|
||||||
isbn = check_isbn(isbn)
|
|
||||||
if not isbn:
|
|
||||||
return mi
|
|
||||||
br = browser()
|
|
||||||
asin = to_asin(br, isbn)
|
|
||||||
if asin and get_metadata(br, asin, mi):
|
|
||||||
return mi
|
|
||||||
from calibre.ebooks.metadata.xisbn import xisbn
|
|
||||||
for i in xisbn.get_associated_isbns(isbn):
|
|
||||||
asin = to_asin(br, i)
|
|
||||||
if asin and get_metadata(br, asin, mi):
|
|
||||||
return mi
|
|
||||||
return mi
|
|
||||||
|
|
||||||
def get_cover_url(isbn, br):
|
|
||||||
isbn = check_isbn(isbn)
|
|
||||||
if not isbn:
|
|
||||||
return None
|
|
||||||
with cache_lock:
|
|
||||||
ans = cover_url_cache.get(isbn, None)
|
|
||||||
if ans:
|
|
||||||
return ans
|
|
||||||
if ans is False:
|
|
||||||
return None
|
|
||||||
asin = to_asin(br, isbn)
|
|
||||||
if asin:
|
|
||||||
ans = _get_cover_url(br, asin)
|
|
||||||
if ans:
|
|
||||||
with cache_lock:
|
|
||||||
cover_url_cache[isbn] = ans
|
|
||||||
return ans
|
|
||||||
from calibre.ebooks.metadata.xisbn import xisbn
|
|
||||||
for i in xisbn.get_associated_isbns(isbn):
|
|
||||||
asin = to_asin(br, i)
|
|
||||||
if asin:
|
|
||||||
ans = _get_cover_url(br, asin)
|
|
||||||
if ans:
|
|
||||||
with cache_lock:
|
|
||||||
cover_url_cache[isbn] = ans
|
|
||||||
cover_url_cache[i] = ans
|
|
||||||
return ans
|
|
||||||
with cache_lock:
|
|
||||||
cover_url_cache[isbn] = False
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _get_cover_url(br, asin):
|
|
||||||
q = 'http://amzn.com/'+asin
|
|
||||||
try:
|
|
||||||
raw = br.open_novisit(q).read()
|
|
||||||
except Exception as e:
|
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
|
||||||
e.getcode() == 404:
|
|
||||||
return None
|
|
||||||
raise
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
return None
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
root = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
|
|
||||||
imgs = root.xpath('//img[@id="prodImage" and @src]')
|
|
||||||
if imgs:
|
|
||||||
src = imgs[0].get('src')
|
|
||||||
parts = src.split('/')
|
|
||||||
if len(parts) > 3:
|
|
||||||
bn = parts[-1]
|
|
||||||
sparts = bn.split('_')
|
|
||||||
if len(sparts) > 2:
|
|
||||||
bn = sparts[0] + sparts[-1]
|
|
||||||
return ('/'.join(parts[:-1]))+'/'+bn
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_metadata(br, asin, mi):
|
|
||||||
q = 'http://amzn.com/'+asin
|
|
||||||
try:
|
|
||||||
raw = br.open_novisit(q).read()
|
|
||||||
except Exception as e:
|
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
|
||||||
e.getcode() == 404:
|
|
||||||
return False
|
|
||||||
raise
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
return False
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
root = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
if root.xpath('//*[@id="errorMessage"]'):
|
|
||||||
return False
|
|
||||||
|
|
||||||
ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
|
|
||||||
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
|
||||||
if ratings:
|
|
||||||
for elem in ratings[0].xpath('descendant::*[@title]'):
|
|
||||||
t = elem.get('title').strip()
|
|
||||||
m = pat.match(t)
|
|
||||||
if m is not None:
|
|
||||||
try:
|
|
||||||
mi.rating = float(m.group(1))/float(m.group(2)) * 5
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
|
|
||||||
if desc:
|
|
||||||
desc = desc[0]
|
|
||||||
for c in desc.xpath('descendant::*[@class="seeAll" or'
|
|
||||||
' @class="emptyClear" or @href]'):
|
|
||||||
c.getparent().remove(c)
|
|
||||||
desc = html.tostring(desc, method='html', encoding=unicode).strip()
|
|
||||||
# remove all attributes from tags
|
|
||||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
|
||||||
# Collapse whitespace
|
|
||||||
#desc = re.sub('\n+', '\n', desc)
|
|
||||||
#desc = re.sub(' +', ' ', desc)
|
|
||||||
# Remove the notice about text referring to out of print editions
|
|
||||||
desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
|
|
||||||
# Remove comments
|
|
||||||
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
|
||||||
mi.comments = sanitize_comments_html(desc)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
import tempfile, os
|
|
||||||
tdir = tempfile.gettempdir()
|
|
||||||
br = browser()
|
|
||||||
for title, isbn in [
|
|
||||||
('The Heroes', '9780316044981'), # Test find_asin
|
|
||||||
('Learning Python', '8324616489'), # Test xisbn
|
|
||||||
('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
|
|
||||||
# Random tests
|
|
||||||
('Star Trek: Destiny: Mere Mortals', '9781416551720'),
|
|
||||||
('The Great Gatsby', '0743273567'),
|
|
||||||
]:
|
|
||||||
cpath = os.path.join(tdir, title+'.jpg')
|
|
||||||
curl = get_cover_url(isbn, br)
|
|
||||||
if curl is None:
|
|
||||||
print 'No cover found for', title
|
|
||||||
else:
|
|
||||||
open(cpath, 'wb').write(br.open_novisit(curl).read())
|
|
||||||
print 'Cover for', title, 'saved to', cpath
|
|
||||||
|
|
||||||
#import time
|
|
||||||
#st = time.time()
|
|
||||||
mi = get_social_metadata(title, None, None, isbn)
|
|
||||||
if not mi.comments:
|
|
||||||
print 'Failed to downlaod social metadata for', title
|
|
||||||
return 1
|
|
||||||
#print '\n\n', time.time() - st, '\n\n'
|
|
||||||
print mi
|
|
||||||
print '\n'
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -1,516 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
|
||||||
|
|
||||||
import sys, textwrap, re, traceback
|
|
||||||
from urllib import urlencode
|
|
||||||
from math import ceil
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
from lxml.html import soupparser
|
|
||||||
|
|
||||||
from calibre.utils.date import parse_date, utcnow, replace_months
|
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
|
||||||
from calibre import browser, preferred_encoding
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
|
||||||
authors_to_sort_string
|
|
||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
|
||||||
from calibre.utils.config import OptionParser
|
|
||||||
from calibre.library.comments import sanitize_comments_html
|
|
||||||
|
|
||||||
|
|
||||||
class AmazonFr(MetadataSource):
|
|
||||||
|
|
||||||
name = 'Amazon French'
|
|
||||||
description = _('Downloads metadata from amazon.fr')
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
author = 'Sengian'
|
|
||||||
version = (1, 0, 0)
|
|
||||||
has_html_comments = True
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
try:
|
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
self.isbn, max_results=10, verbose=self.verbose, lang='fr')
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
class AmazonEs(MetadataSource):
|
|
||||||
|
|
||||||
name = 'Amazon Spanish'
|
|
||||||
description = _('Downloads metadata from amazon.com in spanish')
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
author = 'Sengian'
|
|
||||||
version = (1, 0, 0)
|
|
||||||
has_html_comments = True
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
try:
|
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
self.isbn, max_results=10, verbose=self.verbose, lang='es')
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
class AmazonEn(MetadataSource):
|
|
||||||
|
|
||||||
name = 'Amazon English'
|
|
||||||
description = _('Downloads metadata from amazon.com in english')
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
author = 'Sengian'
|
|
||||||
version = (1, 0, 0)
|
|
||||||
has_html_comments = True
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
try:
|
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
self.isbn, max_results=10, verbose=self.verbose, lang='en')
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
class AmazonDe(MetadataSource):
|
|
||||||
|
|
||||||
name = 'Amazon German'
|
|
||||||
description = _('Downloads metadata from amazon.de')
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
author = 'Sengian'
|
|
||||||
version = (1, 0, 0)
|
|
||||||
has_html_comments = True
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
try:
|
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
self.isbn, max_results=10, verbose=self.verbose, lang='de')
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
class Amazon(MetadataSource):
|
|
||||||
|
|
||||||
name = 'Amazon'
|
|
||||||
description = _('Downloads metadata from amazon.com')
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
author = 'Kovid Goyal & Sengian'
|
|
||||||
version = (1, 1, 0)
|
|
||||||
has_html_comments = True
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
# if not self.site_customization:
|
|
||||||
# return
|
|
||||||
try:
|
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
self.isbn, max_results=10, verbose=self.verbose, lang='all')
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
# @property
|
|
||||||
# def string_customization_help(self):
|
|
||||||
# return _('You can select here the language for metadata search with amazon.com')
|
|
||||||
|
|
||||||
|
|
||||||
def report(verbose):
|
|
||||||
if verbose:
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
|
|
||||||
class Query(object):
|
|
||||||
|
|
||||||
BASE_URL_ALL = 'http://www.amazon.com'
|
|
||||||
BASE_URL_FR = 'http://www.amazon.fr'
|
|
||||||
BASE_URL_DE = 'http://www.amazon.de'
|
|
||||||
|
|
||||||
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
|
|
||||||
max_results=20, rlang='all'):
|
|
||||||
assert not(title is None and author is None and publisher is None \
|
|
||||||
and isbn is None and keywords is None)
|
|
||||||
assert (max_results < 21)
|
|
||||||
|
|
||||||
self.max_results = int(max_results)
|
|
||||||
self.renbres = re.compile(u'\s*(\d+)\s*')
|
|
||||||
|
|
||||||
q = { 'search-alias' : 'stripbooks' ,
|
|
||||||
'unfiltered' : '1',
|
|
||||||
'field-keywords' : '',
|
|
||||||
'field-author' : '',
|
|
||||||
'field-title' : '',
|
|
||||||
'field-isbn' : '',
|
|
||||||
'field-publisher' : ''
|
|
||||||
#get to amazon detailed search page to get all options
|
|
||||||
# 'node' : '',
|
|
||||||
# 'field-binding' : '',
|
|
||||||
#before, during, after
|
|
||||||
# 'field-dateop' : '',
|
|
||||||
#month as number
|
|
||||||
# 'field-datemod' : '',
|
|
||||||
# 'field-dateyear' : '',
|
|
||||||
#french only
|
|
||||||
# 'field-collection' : '',
|
|
||||||
#many options available
|
|
||||||
}
|
|
||||||
|
|
||||||
if rlang =='all':
|
|
||||||
q['sort'] = 'relevanceexprank'
|
|
||||||
self.urldata = self.BASE_URL_ALL
|
|
||||||
elif rlang =='es':
|
|
||||||
q['sort'] = 'relevanceexprank'
|
|
||||||
q['field-language'] = 'Spanish'
|
|
||||||
self.urldata = self.BASE_URL_ALL
|
|
||||||
elif rlang =='en':
|
|
||||||
q['sort'] = 'relevanceexprank'
|
|
||||||
q['field-language'] = 'English'
|
|
||||||
self.urldata = self.BASE_URL_ALL
|
|
||||||
elif rlang =='fr':
|
|
||||||
q['sort'] = 'relevancerank'
|
|
||||||
self.urldata = self.BASE_URL_FR
|
|
||||||
elif rlang =='de':
|
|
||||||
q['sort'] = 'relevancerank'
|
|
||||||
self.urldata = self.BASE_URL_DE
|
|
||||||
self.baseurl = self.urldata
|
|
||||||
|
|
||||||
if isbn is not None:
|
|
||||||
q['field-isbn'] = isbn.replace('-', '')
|
|
||||||
else:
|
|
||||||
if title is not None:
|
|
||||||
q['field-title'] = title
|
|
||||||
if author is not None:
|
|
||||||
q['field-author'] = author
|
|
||||||
if publisher is not None:
|
|
||||||
q['field-publisher'] = publisher
|
|
||||||
if keywords is not None:
|
|
||||||
q['field-keywords'] = keywords
|
|
||||||
|
|
||||||
if isinstance(q, unicode):
|
|
||||||
q = q.encode('utf-8')
|
|
||||||
self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
|
|
||||||
|
|
||||||
def __call__(self, browser, verbose, timeout = 5.):
|
|
||||||
if verbose:
|
|
||||||
print 'Query:', self.urldata
|
|
||||||
|
|
||||||
try:
|
|
||||||
raw = browser.open_novisit(self.urldata, timeout=timeout).read()
|
|
||||||
except Exception as e:
|
|
||||||
report(verbose)
|
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
|
||||||
e.getcode() == 404:
|
|
||||||
return
|
|
||||||
raise
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
return
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
|
|
||||||
try:
|
|
||||||
feed = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
return soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
return None, self.urldata
|
|
||||||
|
|
||||||
#nb of page
|
|
||||||
try:
|
|
||||||
nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
|
|
||||||
except:
|
|
||||||
return None, self.urldata
|
|
||||||
|
|
||||||
pages =[feed]
|
|
||||||
if len(nbresults) > 1:
|
|
||||||
nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
|
|
||||||
for i in xrange(2, nbpagetoquery + 1):
|
|
||||||
try:
|
|
||||||
urldata = self.urldata + '&page=' + str(i)
|
|
||||||
raw = browser.open_novisit(urldata, timeout=timeout).read()
|
|
||||||
except Exception as e:
|
|
||||||
continue
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
continue
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
feed = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
return soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
pages.append(feed)
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for x in pages:
|
|
||||||
results.extend([i.getparent().get('href') \
|
|
||||||
for i in x.xpath("//a/span[@class='srTitle']")])
|
|
||||||
return results[:self.max_results], self.baseurl
|
|
||||||
|
|
||||||
class ResultList(list):
|
|
||||||
|
|
||||||
def __init__(self, baseurl, lang = 'all'):
|
|
||||||
self.baseurl = baseurl
|
|
||||||
self.lang = lang
|
|
||||||
self.repub = re.compile(u'\((.*)\)')
|
|
||||||
self.rerat = re.compile(u'([0-9.]+)')
|
|
||||||
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
|
|
||||||
self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
|
|
||||||
self.recom = re.compile(r'(?s)<!--.*?-->')
|
|
||||||
self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
|
|
||||||
self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
|
|
||||||
self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
|
|
||||||
self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
|
|
||||||
self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
|
|
||||||
|
|
||||||
def strip_tags_etree(self, etreeobj, invalid_tags):
|
|
||||||
for (itag, rmv) in invalid_tags.iteritems():
|
|
||||||
if rmv:
|
|
||||||
for elts in etreeobj.getiterator(itag):
|
|
||||||
elts.drop_tree()
|
|
||||||
else:
|
|
||||||
for elts in etreeobj.getiterator(itag):
|
|
||||||
elts.drop_tag()
|
|
||||||
|
|
||||||
def clean_entry(self, entry, invalid_tags = {'script': True},
|
|
||||||
invalid_id = (), invalid_class=()):
|
|
||||||
#invalid_tags: remove tag and keep content if False else remove
|
|
||||||
#remove tags
|
|
||||||
if invalid_tags:
|
|
||||||
self.strip_tags_etree(entry, invalid_tags)
|
|
||||||
#remove id
|
|
||||||
if invalid_id:
|
|
||||||
for eltid in invalid_id:
|
|
||||||
elt = entry.get_element_by_id(eltid)
|
|
||||||
if elt is not None:
|
|
||||||
elt.drop_tree()
|
|
||||||
#remove class
|
|
||||||
if invalid_class:
|
|
||||||
for eltclass in invalid_class:
|
|
||||||
elts = entry.find_class(eltclass)
|
|
||||||
if elts is not None:
|
|
||||||
for elt in elts:
|
|
||||||
elt.drop_tree()
|
|
||||||
|
|
||||||
def get_title(self, entry):
|
|
||||||
title = entry.get_element_by_id('btAsinTitle')
|
|
||||||
if title is not None:
|
|
||||||
title = title.text
|
|
||||||
return unicode(title.replace('\n', '').strip())
|
|
||||||
|
|
||||||
def get_authors(self, entry):
|
|
||||||
author = entry.get_element_by_id('btAsinTitle')
|
|
||||||
while author.getparent().tag != 'div':
|
|
||||||
author = author.getparent()
|
|
||||||
author = author.getparent()
|
|
||||||
authortext = []
|
|
||||||
for x in author.getiterator('a'):
|
|
||||||
authortext.append(unicode(x.text_content().strip()))
|
|
||||||
return authortext
|
|
||||||
|
|
||||||
def get_description(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
description = entry.get_element_by_id("productDescription").find("div[@class='content']")
|
|
||||||
inv_class = ('seeAll', 'emptyClear')
|
|
||||||
inv_tags ={'img': True, 'a': False}
|
|
||||||
self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
|
|
||||||
description = html.tostring(description, method='html', encoding=unicode).strip()
|
|
||||||
# remove all attributes from tags
|
|
||||||
description = self.reattr.sub(r'<\1>', description)
|
|
||||||
# Remove the notice about text referring to out of print editions
|
|
||||||
description = self.reoutp.sub('', description)
|
|
||||||
# Remove comments
|
|
||||||
description = self.recom.sub('', description)
|
|
||||||
return unicode(sanitize_comments_html(description))
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_tags(self, entry, browser, verbose):
|
|
||||||
try:
|
|
||||||
tags = entry.get_element_by_id('tagContentHolder')
|
|
||||||
testptag = tags.find_class('see-all')
|
|
||||||
if testptag:
|
|
||||||
for x in testptag:
|
|
||||||
alink = x.xpath('descendant-or-self::a')
|
|
||||||
if alink:
|
|
||||||
if alink[0].get('class') == 'tgJsActive':
|
|
||||||
continue
|
|
||||||
link = self.baseurl + alink[0].get('href')
|
|
||||||
entry = self.get_individual_metadata(browser, link, verbose)
|
|
||||||
tags = entry.get_element_by_id('tagContentHolder')
|
|
||||||
break
|
|
||||||
tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
tags = []
|
|
||||||
return tags
|
|
||||||
|
|
||||||
def get_book_info(self, entry, mi, verbose):
|
|
||||||
try:
|
|
||||||
entry = entry.get_element_by_id('SalesRank').getparent()
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
for z in entry.getiterator('h2'):
|
|
||||||
if self.reprod.search(z.text_content()):
|
|
||||||
entry = z.getparent().find("div[@class='content']/ul")
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
return mi
|
|
||||||
elts = entry.findall('li')
|
|
||||||
#pub & date
|
|
||||||
elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
|
|
||||||
if elt:
|
|
||||||
pub = elt[0].find('b').tail
|
|
||||||
mi.publisher = unicode(self.repub.sub('', pub).strip())
|
|
||||||
d = self.repub.search(pub)
|
|
||||||
if d is not None:
|
|
||||||
d = d.group(1)
|
|
||||||
try:
|
|
||||||
default = utcnow().replace(day=15)
|
|
||||||
if self.lang != 'all':
|
|
||||||
d = replace_months(d, self.lang)
|
|
||||||
d = parse_date(d, assume_utc=True, default=default)
|
|
||||||
mi.pubdate = d
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
#ISBN
|
|
||||||
elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
|
|
||||||
if elt:
|
|
||||||
isbn = elt[0].find('b').tail.replace('-', '').strip()
|
|
||||||
if check_isbn(isbn):
|
|
||||||
mi.isbn = unicode(isbn)
|
|
||||||
elif len(elt) > 1:
|
|
||||||
isbn = elt[1].find('b').tail.replace('-', '').strip()
|
|
||||||
if check_isbn(isbn):
|
|
||||||
mi.isbn = unicode(isbn)
|
|
||||||
#Langue
|
|
||||||
elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
|
|
||||||
if elt:
|
|
||||||
langue = elt[0].find('b').tail.strip()
|
|
||||||
if langue:
|
|
||||||
mi.language = unicode(langue)
|
|
||||||
#ratings
|
|
||||||
elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
|
|
||||||
if elt:
|
|
||||||
ratings = elt[0].find_class('swSprite')
|
|
||||||
if ratings:
|
|
||||||
ratings = self.rerat.findall(ratings[0].get('title'))
|
|
||||||
if len(ratings) == 2:
|
|
||||||
mi.rating = float(ratings[0])/float(ratings[1]) * 5
|
|
||||||
return mi
|
|
||||||
|
|
||||||
def fill_MI(self, entry, title, authors, browser, verbose):
|
|
||||||
mi = MetaInformation(title, authors)
|
|
||||||
mi.author_sort = authors_to_sort_string(authors)
|
|
||||||
mi.comments = self.get_description(entry, verbose)
|
|
||||||
mi = self.get_book_info(entry, mi, verbose)
|
|
||||||
mi.tags = self.get_tags(entry, browser, verbose)
|
|
||||||
return mi
|
|
||||||
|
|
||||||
def get_individual_metadata(self, browser, linkdata, verbose):
|
|
||||||
try:
|
|
||||||
raw = browser.open_novisit(linkdata).read()
|
|
||||||
except Exception as e:
|
|
||||||
report(verbose)
|
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
|
||||||
e.getcode() == 404:
|
|
||||||
return
|
|
||||||
raise
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
report(verbose)
|
|
||||||
return
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
return soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
return soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
return
|
|
||||||
|
|
||||||
def populate(self, entries, browser, verbose=False):
|
|
||||||
for x in entries:
|
|
||||||
try:
|
|
||||||
entry = self.get_individual_metadata(browser, x, verbose)
|
|
||||||
# clean results
|
|
||||||
# inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
|
|
||||||
# inv_class = ('buyingDetailsGrid', 'productImageGrid')
|
|
||||||
# inv_tags ={'script': True, 'style': True, 'form': False}
|
|
||||||
# self.clean_entry(entry, invalid_id=inv_ids)
|
|
||||||
title = self.get_title(entry)
|
|
||||||
authors = self.get_authors(entry)
|
|
||||||
except Exception as e:
|
|
||||||
if verbose:
|
|
||||||
print 'Failed to get all details for an entry'
|
|
||||||
print e
|
|
||||||
print 'URL who failed:', x
|
|
||||||
report(verbose)
|
|
||||||
continue
|
|
||||||
self.append(self.fill_MI(entry, title, authors, browser, verbose))
|
|
||||||
|
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None,
|
|
||||||
max_results=5, verbose=False, keywords=None, lang='all'):
|
|
||||||
br = browser()
|
|
||||||
entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
|
||||||
keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
|
|
||||||
|
|
||||||
if entries is None or len(entries) == 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
#List of entry
|
|
||||||
ans = ResultList(baseurl, lang)
|
|
||||||
ans.populate(entries, br, verbose)
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = OptionParser(textwrap.dedent(\
|
|
||||||
_('''\
|
|
||||||
%prog [options]
|
|
||||||
|
|
||||||
Fetch book metadata from Amazon. You must specify one of title, author,
|
|
||||||
ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
|
|
||||||
so you should make your query as specific as possible.
|
|
||||||
You can chose the language for metadata retrieval:
|
|
||||||
All & english & french & german & spanish
|
|
||||||
'''
|
|
||||||
)))
|
|
||||||
parser.add_option('-t', '--title', help='Book title')
|
|
||||||
parser.add_option('-a', '--author', help='Book author(s)')
|
|
||||||
parser.add_option('-p', '--publisher', help='Book publisher')
|
|
||||||
parser.add_option('-i', '--isbn', help='Book ISBN')
|
|
||||||
parser.add_option('-k', '--keywords', help='Keywords')
|
|
||||||
parser.add_option('-m', '--max-results', default=10,
|
|
||||||
help='Maximum number of results to fetch')
|
|
||||||
parser.add_option('-l', '--lang', default='all',
|
|
||||||
help='Chosen language for metadata search (all, en, fr, es, de)')
|
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
|
||||||
help='Be more verbose about errors')
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
try:
|
|
||||||
results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
|
|
||||||
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
|
|
||||||
lang=opts.lang)
|
|
||||||
except AssertionError:
|
|
||||||
report(True)
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
if results is None or len(results) == 0:
|
|
||||||
print 'No result found for this search!'
|
|
||||||
return 0
|
|
||||||
for result in results:
|
|
||||||
print unicode(result).encode(preferred_encoding, 'replace')
|
|
||||||
print
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -68,7 +68,19 @@ composite_formatter = SafeFormat()
|
|||||||
class Metadata(object):
|
class Metadata(object):
|
||||||
|
|
||||||
'''
|
'''
|
||||||
A class representing all the metadata for a book.
|
A class representing all the metadata for a book. The various standard metadata
|
||||||
|
fields are available as attributes of this object. You can also stick
|
||||||
|
arbitrary attributes onto this object.
|
||||||
|
|
||||||
|
Metadata from custom columns should be accessed via the get() method,
|
||||||
|
passing in the lookup name for the column, for example: "#mytags".
|
||||||
|
|
||||||
|
Use the :meth:`is_null` method to test if a filed is null.
|
||||||
|
|
||||||
|
This object also has functions to format fields into strings.
|
||||||
|
|
||||||
|
The list of standard metadata fields grows with time is in
|
||||||
|
:data:`STANDARD_METADATA_FIELDS`.
|
||||||
|
|
||||||
Please keep the method based API of this class to a minimum. Every method
|
Please keep the method based API of this class to a minimum. Every method
|
||||||
becomes a reserved field name.
|
becomes a reserved field name.
|
||||||
@ -88,11 +100,19 @@ class Metadata(object):
|
|||||||
if title:
|
if title:
|
||||||
self.title = title
|
self.title = title
|
||||||
if authors:
|
if authors:
|
||||||
#: List of strings or []
|
# List of strings or []
|
||||||
self.author = list(authors) if authors else []# Needed for backward compatibility
|
self.author = list(authors) if authors else []# Needed for backward compatibility
|
||||||
self.authors = list(authors) if authors else []
|
self.authors = list(authors) if authors else []
|
||||||
|
|
||||||
def is_null(self, field):
|
def is_null(self, field):
|
||||||
|
'''
|
||||||
|
Return True if the value of filed is null in this object.
|
||||||
|
'null' means it is unknown or evaluates to False. So a title of
|
||||||
|
_('Unknown') is null or a language of 'und' is null.
|
||||||
|
|
||||||
|
Be careful with numeric fields since this will return True for zero as
|
||||||
|
well as None.
|
||||||
|
'''
|
||||||
null_val = NULL_VALUES.get(field, None)
|
null_val = NULL_VALUES.get(field, None)
|
||||||
val = getattr(self, field, None)
|
val = getattr(self, field, None)
|
||||||
return not val or val == null_val
|
return not val or val == null_val
|
||||||
@ -120,7 +140,11 @@ class Metadata(object):
|
|||||||
_('TEMPLATE ERROR'),
|
_('TEMPLATE ERROR'),
|
||||||
self).strip()
|
self).strip()
|
||||||
return val
|
return val
|
||||||
|
if field.startswith('#') and field.endswith('_index'):
|
||||||
|
try:
|
||||||
|
return self.get_extra(field[:-6])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
raise AttributeError(
|
raise AttributeError(
|
||||||
'Metadata object has no attribute named: '+ repr(field))
|
'Metadata object has no attribute named: '+ repr(field))
|
||||||
|
|
||||||
@ -170,11 +194,6 @@ class Metadata(object):
|
|||||||
try:
|
try:
|
||||||
return self.__getattribute__(field)
|
return self.__getattribute__(field)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
if field.startswith('#') and field.endswith('_index'):
|
|
||||||
try:
|
|
||||||
return self.get_extra(field[:-6])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def get_extra(self, field, default=None):
|
def get_extra(self, field, default=None):
|
||||||
@ -544,17 +563,24 @@ class Metadata(object):
|
|||||||
def format_tags(self):
|
def format_tags(self):
|
||||||
return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)])
|
return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)])
|
||||||
|
|
||||||
def format_rating(self):
|
def format_rating(self, v=None, divide_by=1.0):
|
||||||
return unicode(self.rating)
|
if v is None:
|
||||||
|
if self.rating is not None:
|
||||||
|
return unicode(self.rating/divide_by)
|
||||||
|
return u'None'
|
||||||
|
return unicode(v/divide_by)
|
||||||
|
|
||||||
def format_field(self, key, series_with_index=True):
|
def format_field(self, key, series_with_index=True):
|
||||||
|
'''
|
||||||
|
Returns the tuple (display_name, formatted_value)
|
||||||
|
'''
|
||||||
name, val, ign, ign = self.format_field_extended(key, series_with_index)
|
name, val, ign, ign = self.format_field_extended(key, series_with_index)
|
||||||
return (name, val)
|
return (name, val)
|
||||||
|
|
||||||
def format_field_extended(self, key, series_with_index=True):
|
def format_field_extended(self, key, series_with_index=True):
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
'''
|
'''
|
||||||
returns the tuple (field_name, formatted_value, original_value,
|
returns the tuple (display_name, formatted_value, original_value,
|
||||||
field_metadata)
|
field_metadata)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@ -631,13 +657,17 @@ class Metadata(object):
|
|||||||
res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
|
res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
|
||||||
elif datatype == 'rating':
|
elif datatype == 'rating':
|
||||||
res = res/2.0
|
res = res/2.0
|
||||||
elif key in ('book_size', 'size'):
|
elif key == 'size':
|
||||||
res = human_readable(res)
|
res = human_readable(res)
|
||||||
return (name, unicode(res), orig_res, fmeta)
|
return (name, unicode(res), orig_res, fmeta)
|
||||||
|
|
||||||
return (None, None, None, None)
|
return (None, None, None, None)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
|
'''
|
||||||
|
A string representation of this object, suitable for printing to
|
||||||
|
console
|
||||||
|
'''
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
ans = []
|
ans = []
|
||||||
def fmt(x, y):
|
def fmt(x, y):
|
||||||
@ -681,6 +711,9 @@ class Metadata(object):
|
|||||||
return u'\n'.join(ans)
|
return u'\n'.join(ans)
|
||||||
|
|
||||||
def to_html(self):
|
def to_html(self):
|
||||||
|
'''
|
||||||
|
A HTML representation of this object.
|
||||||
|
'''
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
ans = [(_('Title'), unicode(self.title))]
|
ans = [(_('Title'), unicode(self.title))]
|
||||||
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
|
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
|
||||||
|
@ -1,317 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import traceback, socket, sys
|
|
||||||
from functools import partial
|
|
||||||
from threading import Thread, Event
|
|
||||||
from Queue import Queue, Empty
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
import mechanize
|
|
||||||
|
|
||||||
from calibre.customize import Plugin
|
|
||||||
from calibre import browser, prints
|
|
||||||
from calibre.constants import preferred_encoding, DEBUG
|
|
||||||
|
|
||||||
class CoverDownload(Plugin):
|
|
||||||
'''
|
|
||||||
These plugins are used to download covers for books.
|
|
||||||
'''
|
|
||||||
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
type = _('Cover download')
|
|
||||||
|
|
||||||
def has_cover(self, mi, ans, timeout=5.):
|
|
||||||
'''
|
|
||||||
Check if the book described by mi has a cover. Call ans.set() if it
|
|
||||||
does. Do nothing if it doesn't.
|
|
||||||
|
|
||||||
:param mi: MetaInformation object
|
|
||||||
:param timeout: timeout in seconds
|
|
||||||
:param ans: A threading.Event object
|
|
||||||
'''
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
|
||||||
'''
|
|
||||||
Download covers for books described by the mi object. Downloaded covers
|
|
||||||
must be put into the result_queue. If more than one cover is available,
|
|
||||||
the plugin should continue downloading them and putting them into
|
|
||||||
result_queue until abort.is_set() returns True.
|
|
||||||
|
|
||||||
:param mi: MetaInformation object
|
|
||||||
:param result_queue: A multithreaded Queue
|
|
||||||
:param abort: A threading.Event object
|
|
||||||
:param timeout: timeout in seconds
|
|
||||||
'''
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def exception_to_string(self, ex):
|
|
||||||
try:
|
|
||||||
return unicode(ex)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
return str(ex).decode(preferred_encoding, 'replace')
|
|
||||||
except:
|
|
||||||
return repr(ex)
|
|
||||||
|
|
||||||
def debug(self, *args, **kwargs):
|
|
||||||
if DEBUG:
|
|
||||||
prints('\t'+self.name+':', *args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class HeadRequest(mechanize.Request):
|
|
||||||
|
|
||||||
def get_method(self):
|
|
||||||
return 'HEAD'
|
|
||||||
|
|
||||||
class OpenLibraryCovers(CoverDownload): # {{{
|
|
||||||
'Download covers from openlibrary.org'
|
|
||||||
|
|
||||||
# See http://openlibrary.org/dev/docs/api/covers
|
|
||||||
|
|
||||||
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
|
||||||
name = 'openlibrary.org covers'
|
|
||||||
description = _('Download covers from openlibrary.org')
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
|
|
||||||
def has_cover(self, mi, ans, timeout=5.):
|
|
||||||
if not mi.isbn:
|
|
||||||
return False
|
|
||||||
from calibre.ebooks.metadata.library_thing import get_browser
|
|
||||||
br = get_browser()
|
|
||||||
br.set_handle_redirect(False)
|
|
||||||
try:
|
|
||||||
br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout)
|
|
||||||
self.debug('cover for', mi.isbn, 'found')
|
|
||||||
ans.set()
|
|
||||||
except Exception as e:
|
|
||||||
if callable(getattr(e, 'getcode', None)) and e.getcode() == 302:
|
|
||||||
self.debug('cover for', mi.isbn, 'found')
|
|
||||||
ans.set()
|
|
||||||
else:
|
|
||||||
self.debug(e)
|
|
||||||
|
|
||||||
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
|
||||||
if not mi.isbn:
|
|
||||||
return
|
|
||||||
from calibre.ebooks.metadata.library_thing import get_browser
|
|
||||||
br = get_browser()
|
|
||||||
try:
|
|
||||||
ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read()
|
|
||||||
result_queue.put((True, ans, 'jpg', self.name))
|
|
||||||
except Exception as e:
|
|
||||||
if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
|
|
||||||
result_queue.put((False, _('ISBN: %s not found')%mi.isbn, '', self.name))
|
|
||||||
else:
|
|
||||||
result_queue.put((False, self.exception_to_string(e),
|
|
||||||
traceback.format_exc(), self.name))
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class AmazonCovers(CoverDownload): # {{{
|
|
||||||
|
|
||||||
name = 'amazon.com covers'
|
|
||||||
description = _('Download covers from amazon.com')
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
|
|
||||||
|
|
||||||
def has_cover(self, mi, ans, timeout=5.):
|
|
||||||
if not mi.isbn:
|
|
||||||
return False
|
|
||||||
from calibre.ebooks.metadata.amazon import get_cover_url
|
|
||||||
br = browser()
|
|
||||||
try:
|
|
||||||
get_cover_url(mi.isbn, br)
|
|
||||||
self.debug('cover for', mi.isbn, 'found')
|
|
||||||
ans.set()
|
|
||||||
except Exception as e:
|
|
||||||
self.debug(e)
|
|
||||||
|
|
||||||
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
|
||||||
if not mi.isbn:
|
|
||||||
return
|
|
||||||
from calibre.ebooks.metadata.amazon import get_cover_url
|
|
||||||
br = browser()
|
|
||||||
try:
|
|
||||||
url = get_cover_url(mi.isbn, br)
|
|
||||||
if url is None:
|
|
||||||
raise ValueError('No cover found for ISBN: %s'%mi.isbn)
|
|
||||||
cover_data = br.open_novisit(url).read()
|
|
||||||
result_queue.put((True, cover_data, 'jpg', self.name))
|
|
||||||
except Exception as e:
|
|
||||||
result_queue.put((False, self.exception_to_string(e),
|
|
||||||
traceback.format_exc(), self.name))
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
def check_for_cover(mi, timeout=5.): # {{{
|
|
||||||
from calibre.customize.ui import cover_sources
|
|
||||||
ans = Event()
|
|
||||||
checkers = [partial(p.has_cover, mi, ans, timeout=timeout) for p in
|
|
||||||
cover_sources()]
|
|
||||||
workers = [Thread(target=c) for c in checkers]
|
|
||||||
for w in workers:
|
|
||||||
w.daemon = True
|
|
||||||
w.start()
|
|
||||||
while not ans.is_set():
|
|
||||||
ans.wait(0.1)
|
|
||||||
if sum([int(w.is_alive()) for w in workers]) == 0:
|
|
||||||
break
|
|
||||||
return ans.is_set()
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
def download_covers(mi, result_queue, max_covers=50, timeout=5.): # {{{
|
|
||||||
from calibre.customize.ui import cover_sources
|
|
||||||
abort = Event()
|
|
||||||
temp = Queue()
|
|
||||||
getters = [partial(p.get_covers, mi, temp, abort, timeout=timeout) for p in
|
|
||||||
cover_sources()]
|
|
||||||
workers = [Thread(target=c) for c in getters]
|
|
||||||
for w in workers:
|
|
||||||
w.daemon = True
|
|
||||||
w.start()
|
|
||||||
count = 0
|
|
||||||
while count < max_covers:
|
|
||||||
try:
|
|
||||||
result = temp.get_nowait()
|
|
||||||
if result[0]:
|
|
||||||
count += 1
|
|
||||||
result_queue.put(result)
|
|
||||||
except Empty:
|
|
||||||
pass
|
|
||||||
if sum([int(w.is_alive()) for w in workers]) == 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
abort.set()
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
result = temp.get_nowait()
|
|
||||||
count += 1
|
|
||||||
result_queue.put(result)
|
|
||||||
except Empty:
|
|
||||||
break
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class DoubanCovers(CoverDownload): # {{{
|
|
||||||
'Download covers from Douban.com'
|
|
||||||
|
|
||||||
DOUBAN_ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
|
|
||||||
CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
|
|
||||||
name = 'Douban.com covers'
|
|
||||||
description = _('Download covers from Douban.com')
|
|
||||||
author = 'Li Fanxi'
|
|
||||||
|
|
||||||
def get_cover_url(self, isbn, br, timeout=5.):
|
|
||||||
try:
|
|
||||||
url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY
|
|
||||||
src = br.open(url, timeout=timeout).read()
|
|
||||||
except Exception as err:
|
|
||||||
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
|
||||||
err = Exception(_('Douban.com API timed out. Try again later.'))
|
|
||||||
raise err
|
|
||||||
else:
|
|
||||||
feed = etree.fromstring(src)
|
|
||||||
NAMESPACES = {
|
|
||||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
|
||||||
'atom' : 'http://www.w3.org/2005/Atom',
|
|
||||||
'db': 'http://www.douban.com/xmlns/'
|
|
||||||
}
|
|
||||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
|
||||||
entries = XPath('//atom:entry')(feed)
|
|
||||||
if len(entries) < 1:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
|
|
||||||
u = cover_url(entries[0])[0].replace('/spic/', '/lpic/');
|
|
||||||
# If URL contains "book-default", the book doesn't have a cover
|
|
||||||
if u.find('book-default') != -1:
|
|
||||||
return None
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
return u
|
|
||||||
|
|
||||||
def has_cover(self, mi, ans, timeout=5.):
|
|
||||||
if not mi.isbn:
|
|
||||||
return False
|
|
||||||
br = browser()
|
|
||||||
try:
|
|
||||||
if self.get_cover_url(mi.isbn, br, timeout=timeout) != None:
|
|
||||||
self.debug('cover for', mi.isbn, 'found')
|
|
||||||
ans.set()
|
|
||||||
except Exception as e:
|
|
||||||
self.debug(e)
|
|
||||||
|
|
||||||
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
|
||||||
if not mi.isbn:
|
|
||||||
return
|
|
||||||
br = browser()
|
|
||||||
try:
|
|
||||||
url = self.get_cover_url(mi.isbn, br, timeout=timeout)
|
|
||||||
cover_data = br.open_novisit(url).read()
|
|
||||||
result_queue.put((True, cover_data, 'jpg', self.name))
|
|
||||||
except Exception as e:
|
|
||||||
result_queue.put((False, self.exception_to_string(e),
|
|
||||||
traceback.format_exc(), self.name))
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
def download_cover(mi, timeout=5.): # {{{
|
|
||||||
results = Queue()
|
|
||||||
download_covers(mi, results, max_covers=1, timeout=timeout)
|
|
||||||
errors, ans = [], None
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
x = results.get_nowait()
|
|
||||||
if x[0]:
|
|
||||||
ans = x[1]
|
|
||||||
else:
|
|
||||||
errors.append(x)
|
|
||||||
except Empty:
|
|
||||||
break
|
|
||||||
return ans, errors
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
def test(isbns): # {{{
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
mi = MetaInformation('test', ['test'])
|
|
||||||
for isbn in isbns:
|
|
||||||
prints('Testing ISBN:', isbn)
|
|
||||||
mi.isbn = isbn
|
|
||||||
found = check_for_cover(mi)
|
|
||||||
prints('Has cover:', found)
|
|
||||||
ans, errors = download_cover(mi)
|
|
||||||
if ans is not None:
|
|
||||||
prints('Cover downloaded')
|
|
||||||
else:
|
|
||||||
prints('Download failed:')
|
|
||||||
for err in errors:
|
|
||||||
prints('\t', err[-1]+':', err[1])
|
|
||||||
print '\n'
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
isbns = sys.argv[1:] + ['9781591025412', '9780307272119']
|
|
||||||
#test(isbns)
|
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
oc = OpenLibraryCovers(None)
|
|
||||||
for isbn in isbns:
|
|
||||||
mi = MetaInformation('xx', ['yy'])
|
|
||||||
mi.isbn = isbn
|
|
||||||
rq = Queue()
|
|
||||||
oc.get_covers(mi, rq, Event())
|
|
||||||
result = rq.get_nowait()
|
|
||||||
if not result[0]:
|
|
||||||
print 'Failed for ISBN:', isbn
|
|
||||||
print result
|
|
@ -1,263 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>; 2010, Li Fanxi <lifanxi@freemindworld.com>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import sys, textwrap
|
|
||||||
import traceback
|
|
||||||
from urllib import urlencode
|
|
||||||
from functools import partial
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
from calibre import browser, preferred_encoding
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
from calibre.utils.config import OptionParser
|
|
||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
|
||||||
from calibre.utils.date import parse_date, utcnow
|
|
||||||
|
|
||||||
NAMESPACES = {
|
|
||||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
|
||||||
'atom' : 'http://www.w3.org/2005/Atom',
|
|
||||||
'db': 'http://www.douban.com/xmlns/'
|
|
||||||
}
|
|
||||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
|
||||||
total_results = XPath('//openSearch:totalResults')
|
|
||||||
start_index = XPath('//openSearch:startIndex')
|
|
||||||
items_per_page = XPath('//openSearch:itemsPerPage')
|
|
||||||
entry = XPath('//atom:entry')
|
|
||||||
entry_id = XPath('descendant::atom:id')
|
|
||||||
title = XPath('descendant::atom:title')
|
|
||||||
description = XPath('descendant::atom:summary')
|
|
||||||
publisher = XPath("descendant::db:attribute[@name='publisher']")
|
|
||||||
isbn = XPath("descendant::db:attribute[@name='isbn13']")
|
|
||||||
date = XPath("descendant::db:attribute[@name='pubdate']")
|
|
||||||
creator = XPath("descendant::db:attribute[@name='author']")
|
|
||||||
tag = XPath("descendant::db:tag")
|
|
||||||
|
|
||||||
CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
|
|
||||||
|
|
||||||
class DoubanBooks(MetadataSource):
|
|
||||||
|
|
||||||
name = 'Douban Books'
|
|
||||||
description = _('Downloads metadata from Douban.com')
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
|
|
||||||
author = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin
|
|
||||||
version = (1, 0, 1) # The version number of this plugin
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
try:
|
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
self.isbn, max_results=10,
|
|
||||||
verbose=self.verbose)
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
def report(verbose):
|
|
||||||
if verbose:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
class Query(object):
|
|
||||||
|
|
||||||
SEARCH_URL = 'http://api.douban.com/book/subjects?'
|
|
||||||
ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
|
|
||||||
|
|
||||||
type = "search"
|
|
||||||
|
|
||||||
def __init__(self, title=None, author=None, publisher=None, isbn=None,
|
|
||||||
max_results=20, start_index=1, api_key=''):
|
|
||||||
assert not(title is None and author is None and publisher is None and \
|
|
||||||
isbn is None)
|
|
||||||
assert (int(max_results) < 21)
|
|
||||||
q = ''
|
|
||||||
if isbn is not None:
|
|
||||||
q = isbn
|
|
||||||
self.type = 'isbn'
|
|
||||||
else:
|
|
||||||
def build_term(parts):
|
|
||||||
return ' '.join(x for x in parts)
|
|
||||||
if title is not None:
|
|
||||||
q += build_term(title.split())
|
|
||||||
if author is not None:
|
|
||||||
q += (' ' if q else '') + build_term(author.split())
|
|
||||||
if publisher is not None:
|
|
||||||
q += (' ' if q else '') + build_term(publisher.split())
|
|
||||||
self.type = 'search'
|
|
||||||
|
|
||||||
if isinstance(q, unicode):
|
|
||||||
q = q.encode('utf-8')
|
|
||||||
|
|
||||||
if self.type == "isbn":
|
|
||||||
self.url = self.ISBN_URL + q
|
|
||||||
if api_key != '':
|
|
||||||
self.url = self.url + "?apikey=" + api_key
|
|
||||||
else:
|
|
||||||
self.url = self.SEARCH_URL+urlencode({
|
|
||||||
'q':q,
|
|
||||||
'max-results':max_results,
|
|
||||||
'start-index':start_index,
|
|
||||||
})
|
|
||||||
if api_key != '':
|
|
||||||
self.url = self.url + "&apikey=" + api_key
|
|
||||||
|
|
||||||
def __call__(self, browser, verbose):
|
|
||||||
if verbose:
|
|
||||||
print 'Query:', self.url
|
|
||||||
if self.type == "search":
|
|
||||||
feed = etree.fromstring(browser.open(self.url).read())
|
|
||||||
total = int(total_results(feed)[0].text)
|
|
||||||
start = int(start_index(feed)[0].text)
|
|
||||||
entries = entry(feed)
|
|
||||||
new_start = start + len(entries)
|
|
||||||
if new_start > total:
|
|
||||||
new_start = 0
|
|
||||||
return entries, new_start
|
|
||||||
elif self.type == "isbn":
|
|
||||||
feed = etree.fromstring(browser.open(self.url).read())
|
|
||||||
entries = entry(feed)
|
|
||||||
return entries, 0
|
|
||||||
|
|
||||||
class ResultList(list):
|
|
||||||
|
|
||||||
def get_description(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
desc = description(entry)
|
|
||||||
if desc:
|
|
||||||
return 'SUMMARY:\n'+desc[0].text
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
|
|
||||||
def get_title(self, entry):
|
|
||||||
candidates = [x.text for x in title(entry)]
|
|
||||||
return ': '.join(candidates)
|
|
||||||
|
|
||||||
def get_authors(self, entry):
|
|
||||||
m = creator(entry)
|
|
||||||
if not m:
|
|
||||||
m = []
|
|
||||||
m = [x.text for x in m]
|
|
||||||
return m
|
|
||||||
|
|
||||||
def get_tags(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
btags = [x.attrib["name"] for x in tag(entry)]
|
|
||||||
tags = []
|
|
||||||
for t in btags:
|
|
||||||
tags.extend([y.strip() for y in t.split('/')])
|
|
||||||
tags = list(sorted(list(set(tags))))
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
tags = []
|
|
||||||
return [x.replace(',', ';') for x in tags]
|
|
||||||
|
|
||||||
def get_publisher(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
pub = publisher(entry)[0].text
|
|
||||||
except:
|
|
||||||
pub = None
|
|
||||||
return pub
|
|
||||||
|
|
||||||
def get_isbn(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
isbn13 = isbn(entry)[0].text
|
|
||||||
except Exception:
|
|
||||||
isbn13 = None
|
|
||||||
return isbn13
|
|
||||||
|
|
||||||
def get_date(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
d = date(entry)
|
|
||||||
if d:
|
|
||||||
default = utcnow().replace(day=15)
|
|
||||||
d = parse_date(d[0].text, assume_utc=True, default=default)
|
|
||||||
else:
|
|
||||||
d = None
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
d = None
|
|
||||||
return d
|
|
||||||
|
|
||||||
def populate(self, entries, browser, verbose=False, api_key=''):
|
|
||||||
for x in entries:
|
|
||||||
try:
|
|
||||||
id_url = entry_id(x)[0].text
|
|
||||||
title = self.get_title(x)
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
mi = MetaInformation(title, self.get_authors(x))
|
|
||||||
try:
|
|
||||||
if api_key != '':
|
|
||||||
id_url = id_url + "?apikey=" + api_key
|
|
||||||
raw = browser.open(id_url).read()
|
|
||||||
feed = etree.fromstring(raw)
|
|
||||||
x = entry(feed)[0]
|
|
||||||
except Exception as e:
|
|
||||||
if verbose:
|
|
||||||
print 'Failed to get all details for an entry'
|
|
||||||
print e
|
|
||||||
mi.comments = self.get_description(x, verbose)
|
|
||||||
mi.tags = self.get_tags(x, verbose)
|
|
||||||
mi.isbn = self.get_isbn(x, verbose)
|
|
||||||
mi.publisher = self.get_publisher(x, verbose)
|
|
||||||
mi.pubdate = self.get_date(x, verbose)
|
|
||||||
self.append(mi)
|
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None,
|
|
||||||
verbose=False, max_results=40, api_key=None):
|
|
||||||
br = browser()
|
|
||||||
start, entries = 1, []
|
|
||||||
|
|
||||||
if api_key is None:
|
|
||||||
api_key = CALIBRE_DOUBAN_API_KEY
|
|
||||||
|
|
||||||
while start > 0 and len(entries) <= max_results:
|
|
||||||
new, start = Query(title=title, author=author, publisher=publisher,
|
|
||||||
isbn=isbn, max_results=max_results, start_index=start, api_key=api_key)(br, verbose)
|
|
||||||
if not new:
|
|
||||||
break
|
|
||||||
entries.extend(new)
|
|
||||||
|
|
||||||
entries = entries[:max_results]
|
|
||||||
|
|
||||||
ans = ResultList()
|
|
||||||
ans.populate(entries, br, verbose, api_key)
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = OptionParser(textwrap.dedent(
|
|
||||||
'''\
|
|
||||||
%prog [options]
|
|
||||||
|
|
||||||
Fetch book metadata from Douban. You must specify one of title, author,
|
|
||||||
publisher or ISBN. If you specify ISBN the others are ignored. Will
|
|
||||||
fetch a maximum of 100 matches, so you should make your query as
|
|
||||||
specific as possible.
|
|
||||||
'''
|
|
||||||
))
|
|
||||||
parser.add_option('-t', '--title', help='Book title')
|
|
||||||
parser.add_option('-a', '--author', help='Book author(s)')
|
|
||||||
parser.add_option('-p', '--publisher', help='Book publisher')
|
|
||||||
parser.add_option('-i', '--isbn', help='Book ISBN')
|
|
||||||
parser.add_option('-m', '--max-results', default=10,
|
|
||||||
help='Maximum number of results to fetch')
|
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
|
||||||
help='Be more verbose about errors')
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
try:
|
|
||||||
results = search(opts.title, opts.author, opts.publisher, opts.isbn,
|
|
||||||
verbose=opts.verbose, max_results=int(opts.max_results))
|
|
||||||
except AssertionError:
|
|
||||||
report(True)
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
for result in results:
|
|
||||||
print unicode(result).encode(preferred_encoding)
|
|
||||||
print
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -13,7 +13,7 @@ import posixpath
|
|||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.utils.zipfile import ZipFile, safe_replace
|
from calibre.utils.zipfile import ZipFile, safe_replace
|
||||||
|
|
||||||
@ -31,9 +31,9 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
opf = OPF(opf_stream)
|
opf = OPF(opf_stream)
|
||||||
mi = opf.to_book_metadata()
|
mi = opf.to_book_metadata()
|
||||||
if extract_cover:
|
if extract_cover:
|
||||||
cover_name = opf.raster_cover
|
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
|
||||||
if cover_name:
|
if cover_href:
|
||||||
mi.cover_data = ('jpg', zf.read(cover_name))
|
mi.cover_data = ('jpg', zf.read(cover_href))
|
||||||
except:
|
except:
|
||||||
return mi
|
return mi
|
||||||
return mi
|
return mi
|
||||||
@ -59,17 +59,20 @@ def set_metadata(stream, mi):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
if new_cdata:
|
if new_cdata:
|
||||||
raster_cover = opf.raster_cover
|
cover = opf.cover
|
||||||
if not raster_cover:
|
if not cover:
|
||||||
raster_cover = 'cover.jpg'
|
cover = 'cover.jpg'
|
||||||
cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover)
|
cpath = posixpath.join(posixpath.dirname(opf_path), cover)
|
||||||
new_cover = _write_new_cover(new_cdata, cpath)
|
new_cover = _write_new_cover(new_cdata, cpath)
|
||||||
replacements[cpath] = open(new_cover.name, 'rb')
|
replacements[cpath] = open(new_cover.name, 'rb')
|
||||||
|
mi.cover = cover
|
||||||
|
|
||||||
# Update the metadata.
|
# Update the metadata.
|
||||||
opf.smart_update(mi, replace_metadata=True)
|
old_mi = opf.to_book_metadata()
|
||||||
|
old_mi.smart_update(mi)
|
||||||
|
opf.smart_update(metadata_to_opf(old_mi), replace_metadata=True)
|
||||||
newopf = StringIO(opf.render())
|
newopf = StringIO(opf.render())
|
||||||
safe_replace(stream, opf_path, newopf, extra_replacements=replacements)
|
safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True)
|
||||||
|
|
||||||
# Cleanup temporary files.
|
# Cleanup temporary files.
|
||||||
try:
|
try:
|
||||||
|
@ -1,523 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import traceback, sys, textwrap, re
|
|
||||||
from threading import Thread
|
|
||||||
|
|
||||||
from calibre import prints
|
|
||||||
from calibre.utils.config import OptionParser
|
|
||||||
from calibre.utils.logging import default_log
|
|
||||||
from calibre.utils.titlecase import titlecase
|
|
||||||
from calibre.customize import Plugin
|
|
||||||
from calibre.ebooks.metadata.covers import check_for_cover
|
|
||||||
from calibre.utils.html2text import html2text
|
|
||||||
|
|
||||||
metadata_config = None
|
|
||||||
|
|
||||||
class MetadataSource(Plugin): # {{{
|
|
||||||
'''
|
|
||||||
Represents a source to query for metadata. Subclasses must implement
|
|
||||||
at least the fetch method.
|
|
||||||
|
|
||||||
When :meth:`fetch` is called, the `self` object will have the following
|
|
||||||
useful attributes (each of which may be None)::
|
|
||||||
|
|
||||||
title, book_author, publisher, isbn, log, verbose and extra
|
|
||||||
|
|
||||||
Use these attributes to construct the search query. extra is reserved for
|
|
||||||
future use.
|
|
||||||
|
|
||||||
The fetch method must store the results in `self.results` as a list of
|
|
||||||
:class:`Metadata` objects. If there is an error, it should be stored
|
|
||||||
in `self.exception` and `self.tb` (for the traceback).
|
|
||||||
'''
|
|
||||||
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
|
|
||||||
#: The type of metadata fetched. 'basic' means basic metadata like
|
|
||||||
#: title/author/isbn/etc. 'social' means social metadata like
|
|
||||||
#: tags/rating/reviews/etc.
|
|
||||||
metadata_type = 'basic'
|
|
||||||
|
|
||||||
#: If not None, the customization dialog will allow for string
|
|
||||||
#: based customization as well the default customization. The
|
|
||||||
#: string customization will be saved in the site_customization
|
|
||||||
#: member.
|
|
||||||
string_customization_help = None
|
|
||||||
|
|
||||||
#: Set this to true if your plugin returns HTML markup in comments.
|
|
||||||
#: Then if the user disables HTML, calibre will automagically convert
|
|
||||||
#: the HTML to Markdown.
|
|
||||||
has_html_comments = False
|
|
||||||
|
|
||||||
type = _('Metadata download')
|
|
||||||
|
|
||||||
def __call__(self, title, author, publisher, isbn, verbose, log=None,
|
|
||||||
extra=None):
|
|
||||||
self.worker = Thread(target=self._fetch)
|
|
||||||
self.worker.daemon = True
|
|
||||||
self.title = title
|
|
||||||
self.verbose = verbose
|
|
||||||
self.book_author = author
|
|
||||||
self.publisher = publisher
|
|
||||||
self.isbn = isbn
|
|
||||||
self.log = log if log is not None else default_log
|
|
||||||
self.extra = extra
|
|
||||||
self.exception, self.tb, self.results = None, None, []
|
|
||||||
self.worker.start()
|
|
||||||
|
|
||||||
def _fetch(self):
|
|
||||||
try:
|
|
||||||
self.fetch()
|
|
||||||
if self.results:
|
|
||||||
c = self.config_store().get(self.name, {})
|
|
||||||
res = self.results
|
|
||||||
if hasattr(res, 'authors'):
|
|
||||||
res = [res]
|
|
||||||
for mi in res:
|
|
||||||
if not c.get('rating', True):
|
|
||||||
mi.rating = None
|
|
||||||
if not c.get('comments', True):
|
|
||||||
mi.comments = None
|
|
||||||
if not c.get('tags', True):
|
|
||||||
mi.tags = []
|
|
||||||
if self.has_html_comments and mi.comments and \
|
|
||||||
c.get('textcomments', False):
|
|
||||||
try:
|
|
||||||
mi.comments = html2text(mi.comments)
|
|
||||||
except:
|
|
||||||
traceback.print_exc()
|
|
||||||
mi.comments = None
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
'''
|
|
||||||
All the actual work is done here.
|
|
||||||
'''
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def join(self):
|
|
||||||
return self.worker.join()
|
|
||||||
|
|
||||||
def is_alive(self):
|
|
||||||
return self.worker.is_alive()
|
|
||||||
|
|
||||||
def is_customizable(self):
|
|
||||||
return True
|
|
||||||
|
|
||||||
def config_store(self):
|
|
||||||
global metadata_config
|
|
||||||
if metadata_config is None:
|
|
||||||
from calibre.utils.config import XMLConfig
|
|
||||||
metadata_config = XMLConfig('plugins/metadata_download')
|
|
||||||
return metadata_config
|
|
||||||
|
|
||||||
def config_widget(self):
|
|
||||||
from PyQt4.Qt import QWidget, QVBoxLayout, QLabel, Qt, QLineEdit, \
|
|
||||||
QCheckBox
|
|
||||||
from calibre.customize.ui import config
|
|
||||||
w = QWidget()
|
|
||||||
w._layout = QVBoxLayout(w)
|
|
||||||
w.setLayout(w._layout)
|
|
||||||
if self.string_customization_help is not None:
|
|
||||||
w._sc_label = QLabel(self.string_customization_help, w)
|
|
||||||
w._layout.addWidget(w._sc_label)
|
|
||||||
customization = config['plugin_customization']
|
|
||||||
def_sc = customization.get(self.name, '')
|
|
||||||
if not def_sc:
|
|
||||||
def_sc = ''
|
|
||||||
w._sc = QLineEdit(def_sc, w)
|
|
||||||
w._layout.addWidget(w._sc)
|
|
||||||
w._sc_label.setWordWrap(True)
|
|
||||||
w._sc_label.setTextInteractionFlags(Qt.LinksAccessibleByMouse
|
|
||||||
| Qt.LinksAccessibleByKeyboard)
|
|
||||||
w._sc_label.setOpenExternalLinks(True)
|
|
||||||
c = self.config_store()
|
|
||||||
c = c.get(self.name, {})
|
|
||||||
for x, l in {'rating':_('ratings'), 'tags':_('tags'),
|
|
||||||
'comments':_('description/reviews')}.items():
|
|
||||||
cb = QCheckBox(_('Download %s from %s')%(l,
|
|
||||||
self.name))
|
|
||||||
setattr(w, '_'+x, cb)
|
|
||||||
cb.setChecked(c.get(x, True))
|
|
||||||
w._layout.addWidget(cb)
|
|
||||||
|
|
||||||
if self.has_html_comments:
|
|
||||||
cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
|
|
||||||
setattr(w, '_textcomments', cb)
|
|
||||||
cb.setChecked(c.get('textcomments', False))
|
|
||||||
w._layout.addWidget(cb)
|
|
||||||
|
|
||||||
return w
|
|
||||||
|
|
||||||
def save_settings(self, w):
|
|
||||||
dl_settings = {}
|
|
||||||
for x in ('rating', 'tags', 'comments'):
|
|
||||||
dl_settings[x] = getattr(w, '_'+x).isChecked()
|
|
||||||
if self.has_html_comments:
|
|
||||||
dl_settings['textcomments'] = getattr(w, '_textcomments').isChecked()
|
|
||||||
c = self.config_store()
|
|
||||||
c.set(self.name, dl_settings)
|
|
||||||
if hasattr(w, '_sc'):
|
|
||||||
sc = unicode(w._sc.text()).strip()
|
|
||||||
from calibre.customize.ui import customize_plugin
|
|
||||||
customize_plugin(self, sc)
|
|
||||||
|
|
||||||
def customization_help(self):
|
|
||||||
return 'This plugin can only be customized using the GUI'
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class GoogleBooks(MetadataSource): # {{{
|
|
||||||
|
|
||||||
name = 'Google Books'
|
|
||||||
description = _('Downloads metadata from Google Books')
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
from calibre.ebooks.metadata.google_books import search
|
|
||||||
try:
|
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
self.isbn, max_results=10,
|
|
||||||
verbose=self.verbose)
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class ISBNDB(MetadataSource): # {{{
|
|
||||||
|
|
||||||
name = 'IsbnDB'
|
|
||||||
description = _('Downloads metadata from isbndb.com')
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
if not self.site_customization:
|
|
||||||
return
|
|
||||||
from calibre.ebooks.metadata.isbndb import option_parser, create_books
|
|
||||||
args = ['isbndb']
|
|
||||||
if self.isbn:
|
|
||||||
args.extend(['--isbn', self.isbn])
|
|
||||||
else:
|
|
||||||
if self.title:
|
|
||||||
args.extend(['--title', self.title])
|
|
||||||
if self.book_author:
|
|
||||||
args.extend(['--author', self.book_author])
|
|
||||||
if self.publisher:
|
|
||||||
args.extend(['--publisher', self.publisher])
|
|
||||||
if self.verbose:
|
|
||||||
args.extend(['--verbose'])
|
|
||||||
args.append(self.site_customization) # IsbnDb key
|
|
||||||
try:
|
|
||||||
opts, args = option_parser().parse_args(args)
|
|
||||||
self.results = create_books(opts, args)
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def string_customization_help(self):
|
|
||||||
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
|
|
||||||
'and enter your access key below.')
|
|
||||||
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class Amazon(MetadataSource): # {{{
|
|
||||||
|
|
||||||
name = 'Amazon'
|
|
||||||
metadata_type = 'social'
|
|
||||||
description = _('Downloads social metadata from amazon.com')
|
|
||||||
|
|
||||||
has_html_comments = True
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
if not self.isbn:
|
|
||||||
return
|
|
||||||
from calibre.ebooks.metadata.amazon import get_social_metadata
|
|
||||||
try:
|
|
||||||
self.results = get_social_metadata(self.title, self.book_author,
|
|
||||||
self.publisher, self.isbn)
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class KentDistrictLibrary(MetadataSource): # {{{
|
|
||||||
|
|
||||||
name = 'Kent District Library'
|
|
||||||
metadata_type = 'social'
|
|
||||||
description = _('Downloads series information from ww2.kdl.org. '
|
|
||||||
'This website cannot handle large numbers of queries, '
|
|
||||||
'so the plugin is disabled by default.')
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
if not self.title or not self.book_author:
|
|
||||||
return
|
|
||||||
from calibre.ebooks.metadata.kdl import get_series
|
|
||||||
try:
|
|
||||||
self.results = get_series(self.title, self.book_author)
|
|
||||||
except Exception as e:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
|
|
||||||
def result_index(source, result):
|
|
||||||
if not result.isbn:
|
|
||||||
return -1
|
|
||||||
for i, x in enumerate(source):
|
|
||||||
if x.isbn == result.isbn:
|
|
||||||
return i
|
|
||||||
return -1
|
|
||||||
|
|
||||||
def merge_results(one, two):
|
|
||||||
if two is not None and one is not None:
|
|
||||||
for x in two:
|
|
||||||
idx = result_index(one, x)
|
|
||||||
if idx < 0:
|
|
||||||
one.append(x)
|
|
||||||
else:
|
|
||||||
one[idx].smart_update(x)
|
|
||||||
|
|
||||||
class MetadataSources(object):
|
|
||||||
|
|
||||||
def __init__(self, sources):
|
|
||||||
self.sources = sources
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
for s in self.sources:
|
|
||||||
s.__enter__()
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, *args):
|
|
||||||
for s in self.sources:
|
|
||||||
s.__exit__()
|
|
||||||
|
|
||||||
def __call__(self, *args, **kwargs):
|
|
||||||
for s in self.sources:
|
|
||||||
s(*args, **kwargs)
|
|
||||||
|
|
||||||
def join(self):
|
|
||||||
for s in self.sources:
|
|
||||||
s.join()
|
|
||||||
|
|
||||||
def filter_metadata_results(item):
|
|
||||||
keywords = ["audio", "tape", "cassette", "abridged", "playaway"]
|
|
||||||
for keyword in keywords:
|
|
||||||
if item.publisher and keyword in item.publisher.lower():
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def do_cover_check(item):
|
|
||||||
item.has_cover = False
|
|
||||||
try:
|
|
||||||
item.has_cover = check_for_cover(item)
|
|
||||||
except:
|
|
||||||
pass # Cover not found
|
|
||||||
|
|
||||||
def check_for_covers(items):
|
|
||||||
threads = [Thread(target=do_cover_check, args=(item,)) for item in items]
|
|
||||||
for t in threads: t.start()
|
|
||||||
for t in threads: t.join()
|
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
|
||||||
verbose=0):
|
|
||||||
assert not(title is None and author is None and publisher is None and \
|
|
||||||
isbn is None)
|
|
||||||
from calibre.customize.ui import metadata_sources, migrate_isbndb_key
|
|
||||||
migrate_isbndb_key()
|
|
||||||
if isbn is not None:
|
|
||||||
isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
|
|
||||||
fetchers = list(metadata_sources(isbndb_key=isbndb_key))
|
|
||||||
with MetadataSources(fetchers) as manager:
|
|
||||||
manager(title, author, publisher, isbn, verbose)
|
|
||||||
manager.join()
|
|
||||||
|
|
||||||
results = list(fetchers[0].results) if fetchers else []
|
|
||||||
for fetcher in fetchers[1:]:
|
|
||||||
merge_results(results, fetcher.results)
|
|
||||||
|
|
||||||
results = list(filter(filter_metadata_results, results))
|
|
||||||
|
|
||||||
check_for_covers(results)
|
|
||||||
|
|
||||||
words = ("the", "a", "an", "of", "and")
|
|
||||||
prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
|
|
||||||
trailing_paren_pat = re.compile(r'\(.*\)$')
|
|
||||||
whitespace_pat = re.compile(r'\s+')
|
|
||||||
|
|
||||||
def sort_func(x, y):
|
|
||||||
|
|
||||||
def cleanup_title(s):
|
|
||||||
if s is None:
|
|
||||||
s = _('Unknown')
|
|
||||||
s = s.strip().lower()
|
|
||||||
s = prefix_pat.sub(' ', s)
|
|
||||||
s = trailing_paren_pat.sub('', s)
|
|
||||||
s = whitespace_pat.sub(' ', s)
|
|
||||||
return s.strip()
|
|
||||||
|
|
||||||
t = cleanup_title(title)
|
|
||||||
x_title = cleanup_title(x.title)
|
|
||||||
y_title = cleanup_title(y.title)
|
|
||||||
|
|
||||||
# prefer titles that start with the search title
|
|
||||||
tx = cmp(t, x_title)
|
|
||||||
ty = cmp(t, y_title)
|
|
||||||
result = 0 if abs(tx) == abs(ty) else abs(tx) - abs(ty)
|
|
||||||
|
|
||||||
# then prefer titles that have a cover image
|
|
||||||
if result == 0:
|
|
||||||
result = -cmp(x.has_cover, y.has_cover)
|
|
||||||
|
|
||||||
# then prefer titles with the longest comment, with in 10%
|
|
||||||
if result == 0:
|
|
||||||
cx = len(x.comments.strip() if x.comments else '')
|
|
||||||
cy = len(y.comments.strip() if y.comments else '')
|
|
||||||
t = (cx + cy) / 20
|
|
||||||
result = cy - cx
|
|
||||||
if abs(result) < t:
|
|
||||||
result = 0
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
results = sorted(results, cmp=sort_func)
|
|
||||||
|
|
||||||
# if for some reason there is no comment in the top selection, go looking for one
|
|
||||||
if len(results) > 1:
|
|
||||||
if not results[0].comments or len(results[0].comments) == 0:
|
|
||||||
for r in results[1:]:
|
|
||||||
try:
|
|
||||||
if title and title.lower() == r.title[:len(title)].lower() \
|
|
||||||
and r.comments and len(r.comments):
|
|
||||||
results[0].comments = r.comments
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
# Find a pubdate
|
|
||||||
pubdate = None
|
|
||||||
for r in results:
|
|
||||||
if r.pubdate is not None:
|
|
||||||
pubdate = r.pubdate
|
|
||||||
break
|
|
||||||
if pubdate is not None:
|
|
||||||
for r in results:
|
|
||||||
if r.pubdate is None:
|
|
||||||
r.pubdate = pubdate
|
|
||||||
|
|
||||||
def fix_case(x):
|
|
||||||
if x:
|
|
||||||
x = titlecase(x)
|
|
||||||
return x
|
|
||||||
|
|
||||||
for r in results:
|
|
||||||
r.title = fix_case(r.title)
|
|
||||||
if r.authors:
|
|
||||||
r.authors = list(map(fix_case, r.authors))
|
|
||||||
|
|
||||||
return results, [(x.name, x.exception, x.tb) for x in fetchers]
|
|
||||||
|
|
||||||
def get_social_metadata(mi, verbose=0):
|
|
||||||
from calibre.customize.ui import metadata_sources
|
|
||||||
fetchers = list(metadata_sources(metadata_type='social'))
|
|
||||||
with MetadataSources(fetchers) as manager:
|
|
||||||
manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
|
|
||||||
manager.join()
|
|
||||||
ratings, tags, comments, series, series_index = [], set([]), set([]), None, None
|
|
||||||
for fetcher in fetchers:
|
|
||||||
if fetcher.results:
|
|
||||||
dmi = fetcher.results
|
|
||||||
if dmi.rating is not None:
|
|
||||||
ratings.append(dmi.rating)
|
|
||||||
if dmi.tags:
|
|
||||||
for t in dmi.tags:
|
|
||||||
tags.add(t)
|
|
||||||
if mi.pubdate is None and dmi.pubdate is not None:
|
|
||||||
mi.pubdate = dmi.pubdate
|
|
||||||
if dmi.comments:
|
|
||||||
comments.add(dmi.comments)
|
|
||||||
if dmi.series is not None:
|
|
||||||
series = dmi.series
|
|
||||||
if dmi.series_index is not None:
|
|
||||||
series_index = dmi.series_index
|
|
||||||
if ratings:
|
|
||||||
rating = sum(ratings)/float(len(ratings))
|
|
||||||
if mi.rating is None or mi.rating < 0.1:
|
|
||||||
mi.rating = rating
|
|
||||||
else:
|
|
||||||
mi.rating = (mi.rating + rating)/2.0
|
|
||||||
if tags:
|
|
||||||
if not mi.tags:
|
|
||||||
mi.tags = []
|
|
||||||
mi.tags += list(tags)
|
|
||||||
mi.tags = list(sorted(list(set(mi.tags))))
|
|
||||||
if comments:
|
|
||||||
if not mi.comments or len(mi.comments)+20 < len(' '.join(comments)):
|
|
||||||
mi.comments = ''
|
|
||||||
for x in comments:
|
|
||||||
mi.comments += x+'\n\n'
|
|
||||||
if series and series_index is not None:
|
|
||||||
mi.series = series
|
|
||||||
mi.series_index = series_index
|
|
||||||
|
|
||||||
return [(x.name, x.exception, x.tb) for x in fetchers if x.exception is not
|
|
||||||
None]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = OptionParser(textwrap.dedent(
|
|
||||||
'''\
|
|
||||||
%prog [options]
|
|
||||||
|
|
||||||
Fetch book metadata from online sources. You must specify at least one
|
|
||||||
of title, author, publisher or ISBN. If you specify ISBN, the others
|
|
||||||
are ignored.
|
|
||||||
'''
|
|
||||||
))
|
|
||||||
parser.add_option('-t', '--title', help='Book title')
|
|
||||||
parser.add_option('-a', '--author', help='Book author(s)')
|
|
||||||
parser.add_option('-p', '--publisher', help='Book publisher')
|
|
||||||
parser.add_option('-i', '--isbn', help='Book ISBN')
|
|
||||||
parser.add_option('-m', '--max-results', default=10,
|
|
||||||
help='Maximum number of results to fetch')
|
|
||||||
parser.add_option('-k', '--isbndb-key',
|
|
||||||
help=('The access key for your ISBNDB.com account. '
|
|
||||||
'Only needed if you want to search isbndb.com '
|
|
||||||
'and you haven\'t customized the IsbnDB plugin.'))
|
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
|
||||||
help='Be more verbose about errors')
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
results, exceptions = search(opts.title, opts.author, opts.publisher,
|
|
||||||
opts.isbn, opts.isbndb_key, opts.verbose)
|
|
||||||
social_exceptions = []
|
|
||||||
for result in results:
|
|
||||||
social_exceptions.extend(get_social_metadata(result, opts.verbose))
|
|
||||||
prints(unicode(result))
|
|
||||||
print
|
|
||||||
|
|
||||||
for name, exception, tb in exceptions+social_exceptions:
|
|
||||||
if exception is not None:
|
|
||||||
print 'WARNING: Fetching from', name, 'failed with error:'
|
|
||||||
print exception
|
|
||||||
print tb
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -1,390 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import sys, textwrap, re, traceback, socket
|
|
||||||
from urllib import urlencode
|
|
||||||
|
|
||||||
from lxml.html import soupparser, tostring
|
|
||||||
|
|
||||||
from calibre import browser, preferred_encoding
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
|
||||||
authors_to_sort_string
|
|
||||||
from calibre.library.comments import sanitize_comments_html
|
|
||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
|
||||||
from calibre.utils.config import OptionParser
|
|
||||||
from calibre.utils.date import parse_date, utcnow
|
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
|
||||||
|
|
||||||
class Fictionwise(MetadataSource): # {{{
|
|
||||||
|
|
||||||
author = 'Sengian'
|
|
||||||
name = 'Fictionwise'
|
|
||||||
description = _('Downloads metadata from Fictionwise')
|
|
||||||
|
|
||||||
has_html_comments = True
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
try:
|
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
self.isbn, max_results=10, verbose=self.verbose)
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class FictionwiseError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def report(verbose):
|
|
||||||
if verbose:
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
class Query(object):
|
|
||||||
|
|
||||||
BASE_URL = 'http://www.fictionwise.com/servlet/mw'
|
|
||||||
|
|
||||||
def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
|
|
||||||
assert not(title is None and author is None and publisher is None and keywords is None)
|
|
||||||
assert (max_results < 21)
|
|
||||||
|
|
||||||
self.max_results = int(max_results)
|
|
||||||
q = { 'template' : 'searchresults_adv.htm' ,
|
|
||||||
'searchtitle' : '',
|
|
||||||
'searchauthor' : '',
|
|
||||||
'searchpublisher' : '',
|
|
||||||
'searchkeyword' : '',
|
|
||||||
#possibilities startoflast, fullname, lastfirst
|
|
||||||
'searchauthortype' : 'startoflast',
|
|
||||||
'searchcategory' : '',
|
|
||||||
'searchcategory2' : '',
|
|
||||||
'searchprice_s' : '0',
|
|
||||||
'searchprice_e' : 'ANY',
|
|
||||||
'searchformat' : '',
|
|
||||||
'searchgeo' : 'US',
|
|
||||||
'searchfwdatetype' : '',
|
|
||||||
#maybe use dates fields if needed?
|
|
||||||
#'sortorder' : 'DESC',
|
|
||||||
#many options available: b.SortTitle, a.SortName,
|
|
||||||
#b.DateFirstPublished, b.FWPublishDate
|
|
||||||
'sortby' : 'b.SortTitle'
|
|
||||||
}
|
|
||||||
if title is not None:
|
|
||||||
q['searchtitle'] = title
|
|
||||||
if author is not None:
|
|
||||||
q['searchauthor'] = author
|
|
||||||
if publisher is not None:
|
|
||||||
q['searchpublisher'] = publisher
|
|
||||||
if keywords is not None:
|
|
||||||
q['searchkeyword'] = keywords
|
|
||||||
|
|
||||||
if isinstance(q, unicode):
|
|
||||||
q = q.encode('utf-8')
|
|
||||||
self.urldata = urlencode(q)
|
|
||||||
|
|
||||||
def __call__(self, browser, verbose, timeout = 5.):
|
|
||||||
if verbose:
|
|
||||||
print _('Query: %s') % self.BASE_URL+self.urldata
|
|
||||||
|
|
||||||
try:
|
|
||||||
raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
|
|
||||||
except Exception as e:
|
|
||||||
report(verbose)
|
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
|
||||||
e.getcode() == 404:
|
|
||||||
return
|
|
||||||
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
|
||||||
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
|
|
||||||
raise FictionwiseError(_('Fictionwise encountered an error.'))
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
return
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
feed = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# get list of results as links
|
|
||||||
results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
|
|
||||||
results = results[:self.max_results]
|
|
||||||
results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
|
|
||||||
#return feed if no links ie normally a single book or nothing
|
|
||||||
if not results:
|
|
||||||
results = [feed]
|
|
||||||
return results
|
|
||||||
|
|
||||||
class ResultList(list):
|
|
||||||
|
|
||||||
BASE_URL = 'http://www.fictionwise.com'
|
|
||||||
COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.retitle = re.compile(r'\[[^\[\]]+\]')
|
|
||||||
self.rechkauth = re.compile(r'.*book\s*by', re.I)
|
|
||||||
self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
|
|
||||||
self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
|
|
||||||
self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
|
|
||||||
self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
|
|
||||||
self.resplitbr = re.compile(r'<br[^>]*>', re.I)
|
|
||||||
self.recomment = re.compile(r'(?s)<!--.*?-->')
|
|
||||||
self.reimg = re.compile(r'<img[^>]*>', re.I)
|
|
||||||
self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
|
|
||||||
self.renbcom = re.compile('(?P<nbcom>\d+)\s*Reader Ratings:')
|
|
||||||
self.recolor = re.compile('(?P<ncolor>[^/]+).gif')
|
|
||||||
self.resplitbrdiv = re.compile(r'(<br[^>]+>|</?div[^>]*>)', re.I)
|
|
||||||
self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
|
|
||||||
|
|
||||||
def strip_tags_etree(self, etreeobj, invalid_tags):
|
|
||||||
for (itag, rmv) in invalid_tags.iteritems():
|
|
||||||
if rmv:
|
|
||||||
for elts in etreeobj.getiterator(itag):
|
|
||||||
elts.drop_tree()
|
|
||||||
else:
|
|
||||||
for elts in etreeobj.getiterator(itag):
|
|
||||||
elts.drop_tag()
|
|
||||||
|
|
||||||
def clean_entry(self, entry, invalid_tags = {'script': True},
|
|
||||||
invalid_id = (), invalid_class=(), invalid_xpath = ()):
|
|
||||||
#invalid_tags: remove tag and keep content if False else remove
|
|
||||||
#remove tags
|
|
||||||
if invalid_tags:
|
|
||||||
self.strip_tags_etree(entry, invalid_tags)
|
|
||||||
#remove xpath
|
|
||||||
if invalid_xpath:
|
|
||||||
for eltid in invalid_xpath:
|
|
||||||
elt = entry.xpath(eltid)
|
|
||||||
for el in elt:
|
|
||||||
el.drop_tree()
|
|
||||||
#remove id
|
|
||||||
if invalid_id:
|
|
||||||
for eltid in invalid_id:
|
|
||||||
elt = entry.get_element_by_id(eltid)
|
|
||||||
if elt is not None:
|
|
||||||
elt.drop_tree()
|
|
||||||
#remove class
|
|
||||||
if invalid_class:
|
|
||||||
for eltclass in invalid_class:
|
|
||||||
elts = entry.find_class(eltclass)
|
|
||||||
if elts is not None:
|
|
||||||
for elt in elts:
|
|
||||||
elt.drop_tree()
|
|
||||||
|
|
||||||
def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
|
|
||||||
out = tostring(entry, pretty_print=prettyout)
|
|
||||||
#try to work around tostring to remove this encoding for exemle
|
|
||||||
reclean = re.compile('(\n+|\t+|\r+|&#'+htmlrm+';)')
|
|
||||||
return reclean.sub('', out)
|
|
||||||
|
|
||||||
def get_title(self, entry):
|
|
||||||
title = entry.findtext('./')
|
|
||||||
return self.retitle.sub('', title).strip()
|
|
||||||
|
|
||||||
def get_authors(self, entry):
|
|
||||||
authortext = entry.find('./br').tail
|
|
||||||
if not self.rechkauth.search(authortext):
|
|
||||||
return []
|
|
||||||
authortext = self.rechkauth.sub('', authortext)
|
|
||||||
return [a.strip() for a in authortext.split('&')]
|
|
||||||
|
|
||||||
def get_rating(self, entrytable, verbose):
|
|
||||||
nbcomment = tostring(entrytable.getprevious())
|
|
||||||
try:
|
|
||||||
nbcomment = self.renbcom.search(nbcomment).group("nbcom")
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
return None
|
|
||||||
hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
|
|
||||||
float(image.get('height', default=0))) \
|
|
||||||
for image in entrytable.getiterator('img'))
|
|
||||||
#ratings as x/5
|
|
||||||
return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
|
|
||||||
|
|
||||||
def get_description(self, entry):
|
|
||||||
description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
|
|
||||||
description = self.redesc.search(description)
|
|
||||||
if not description or not description.group("desc"):
|
|
||||||
return None
|
|
||||||
#remove invalid tags
|
|
||||||
description = self.reimg.sub('', description.group("desc"))
|
|
||||||
description = self.recomment.sub('', description)
|
|
||||||
description = self.resanitize.sub('', sanitize_comments_html(description))
|
|
||||||
return _('SUMMARY:\n %s') % re.sub(r'\n\s+</p>','\n</p>', description)
|
|
||||||
|
|
||||||
def get_publisher(self, entry):
|
|
||||||
publisher = self.output_entry(entry.xpath('./p')[1])
|
|
||||||
publisher = filter(lambda x: self.repub.search(x) is not None,
|
|
||||||
self.resplitbr.split(publisher))
|
|
||||||
if not len(publisher):
|
|
||||||
return None
|
|
||||||
publisher = self.repub.sub('', publisher[0])
|
|
||||||
return publisher.split(',')[0].strip()
|
|
||||||
|
|
||||||
def get_tags(self, entry):
|
|
||||||
tag = self.output_entry(entry.xpath('./p')[1])
|
|
||||||
tag = filter(lambda x: self.retag.search(x) is not None,
|
|
||||||
self.resplitbr.split(tag))
|
|
||||||
if not len(tag):
|
|
||||||
return []
|
|
||||||
return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
|
|
||||||
|
|
||||||
def get_date(self, entry, verbose):
|
|
||||||
date = self.output_entry(entry.xpath('./p')[1])
|
|
||||||
date = filter(lambda x: self.redate.search(x) is not None,
|
|
||||||
self.resplitbr.split(date))
|
|
||||||
if not len(date):
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
d = self.redate.sub('', date[0])
|
|
||||||
if d:
|
|
||||||
default = utcnow().replace(day=15)
|
|
||||||
d = parse_date(d, assume_utc=True, default=default)
|
|
||||||
else:
|
|
||||||
d = None
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
d = None
|
|
||||||
return d
|
|
||||||
|
|
||||||
def get_ISBN(self, entry):
|
|
||||||
isbns = self.output_entry(entry.xpath('./p')[2])
|
|
||||||
isbns = filter(lambda x: self.reisbn.search(x) is not None,
|
|
||||||
self.resplitbrdiv.split(isbns))
|
|
||||||
if not len(isbns):
|
|
||||||
return None
|
|
||||||
isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
|
|
||||||
return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
|
|
||||||
|
|
||||||
def fill_MI(self, entry, title, authors, ratings, verbose):
|
|
||||||
mi = MetaInformation(title, authors)
|
|
||||||
mi.rating = ratings
|
|
||||||
mi.comments = self.get_description(entry)
|
|
||||||
mi.publisher = self.get_publisher(entry)
|
|
||||||
mi.tags = self.get_tags(entry)
|
|
||||||
mi.pubdate = self.get_date(entry, verbose)
|
|
||||||
mi.isbn = self.get_ISBN(entry)
|
|
||||||
mi.author_sort = authors_to_sort_string(authors)
|
|
||||||
return mi
|
|
||||||
|
|
||||||
def get_individual_metadata(self, browser, linkdata, verbose):
|
|
||||||
try:
|
|
||||||
raw = browser.open_novisit(self.BASE_URL + linkdata).read()
|
|
||||||
except Exception as e:
|
|
||||||
report(verbose)
|
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
|
||||||
e.getcode() == 404:
|
|
||||||
return
|
|
||||||
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
|
||||||
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
|
|
||||||
raise FictionwiseError(_('Fictionwise encountered an error.'))
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
report(verbose)
|
|
||||||
return
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
return soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
return soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def populate(self, entries, browser, verbose=False):
|
|
||||||
inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
|
|
||||||
'ul': False, 'span': False}
|
|
||||||
inv_xpath =('./table',)
|
|
||||||
#single entry
|
|
||||||
if len(entries) == 1 and not isinstance(entries[0], str):
|
|
||||||
try:
|
|
||||||
entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
|
|
||||||
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
|
|
||||||
title = self.get_title(entry)
|
|
||||||
#maybe strenghten the search
|
|
||||||
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
|
|
||||||
authors = self.get_authors(entry)
|
|
||||||
except Exception as e:
|
|
||||||
if verbose:
|
|
||||||
print _('Failed to get all details for an entry')
|
|
||||||
print e
|
|
||||||
return
|
|
||||||
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
|
|
||||||
else:
|
|
||||||
#multiple entries
|
|
||||||
for x in entries:
|
|
||||||
try:
|
|
||||||
entry = self.get_individual_metadata(browser, x, verbose)
|
|
||||||
entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
|
|
||||||
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
|
|
||||||
title = self.get_title(entry)
|
|
||||||
#maybe strenghten the search
|
|
||||||
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
|
|
||||||
authors = self.get_authors(entry)
|
|
||||||
except Exception as e:
|
|
||||||
if verbose:
|
|
||||||
print _('Failed to get all details for an entry')
|
|
||||||
print e
|
|
||||||
continue
|
|
||||||
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
|
|
||||||
|
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None,
|
|
||||||
min_viewability='none', verbose=False, max_results=5,
|
|
||||||
keywords=None):
|
|
||||||
br = browser()
|
|
||||||
entries = Query(title=title, author=author, publisher=publisher,
|
|
||||||
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
|
|
||||||
|
|
||||||
#List of entry
|
|
||||||
ans = ResultList()
|
|
||||||
ans.populate(entries, br, verbose)
|
|
||||||
return ans
|
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = OptionParser(textwrap.dedent(\
|
|
||||||
_('''\
|
|
||||||
%prog [options]
|
|
||||||
|
|
||||||
Fetch book metadata from Fictionwise. You must specify one of title, author,
|
|
||||||
or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
|
|
||||||
so you should make your query as specific as possible.
|
|
||||||
''')
|
|
||||||
))
|
|
||||||
parser.add_option('-t', '--title', help=_('Book title'))
|
|
||||||
parser.add_option('-a', '--author', help=_('Book author(s)'))
|
|
||||||
parser.add_option('-p', '--publisher', help=_('Book publisher'))
|
|
||||||
parser.add_option('-k', '--keywords', help=_('Keywords'))
|
|
||||||
parser.add_option('-m', '--max-results', default=20,
|
|
||||||
help=_('Maximum number of results to fetch'))
|
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
|
||||||
help=_('Be more verbose about errors'))
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
try:
|
|
||||||
results = search(opts.title, opts.author, publisher=opts.publisher,
|
|
||||||
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
|
|
||||||
except AssertionError:
|
|
||||||
report(True)
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
if results is None or len(results) == 0:
|
|
||||||
print _('No result found for this search!')
|
|
||||||
return 0
|
|
||||||
for result in results:
|
|
||||||
print unicode(result).encode(preferred_encoding, 'replace')
|
|
||||||
print
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -1,247 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import sys, textwrap
|
|
||||||
from urllib import urlencode
|
|
||||||
from functools import partial
|
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
from calibre import browser, preferred_encoding
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
from calibre.utils.config import OptionParser
|
|
||||||
from calibre.utils.date import parse_date, utcnow
|
|
||||||
|
|
||||||
NAMESPACES = {
|
|
||||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
|
||||||
'atom' : 'http://www.w3.org/2005/Atom',
|
|
||||||
'dc': 'http://purl.org/dc/terms'
|
|
||||||
}
|
|
||||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
|
||||||
|
|
||||||
total_results = XPath('//openSearch:totalResults')
|
|
||||||
start_index = XPath('//openSearch:startIndex')
|
|
||||||
items_per_page = XPath('//openSearch:itemsPerPage')
|
|
||||||
entry = XPath('//atom:entry')
|
|
||||||
entry_id = XPath('descendant::atom:id')
|
|
||||||
creator = XPath('descendant::dc:creator')
|
|
||||||
identifier = XPath('descendant::dc:identifier')
|
|
||||||
title = XPath('descendant::dc:title')
|
|
||||||
date = XPath('descendant::dc:date')
|
|
||||||
publisher = XPath('descendant::dc:publisher')
|
|
||||||
subject = XPath('descendant::dc:subject')
|
|
||||||
description = XPath('descendant::dc:description')
|
|
||||||
language = XPath('descendant::dc:language')
|
|
||||||
|
|
||||||
def report(verbose):
|
|
||||||
if verbose:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
|
|
||||||
class Query(object):
|
|
||||||
|
|
||||||
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
|
|
||||||
|
|
||||||
def __init__(self, title=None, author=None, publisher=None, isbn=None,
|
|
||||||
max_results=20, min_viewability='none', start_index=1):
|
|
||||||
assert not(title is None and author is None and publisher is None and \
|
|
||||||
isbn is None)
|
|
||||||
assert (max_results < 21)
|
|
||||||
assert (min_viewability in ('none', 'partial', 'full'))
|
|
||||||
q = ''
|
|
||||||
if isbn is not None:
|
|
||||||
q += 'isbn:'+isbn
|
|
||||||
else:
|
|
||||||
def build_term(prefix, parts):
|
|
||||||
return ' '.join('in'+prefix + ':' + x for x in parts)
|
|
||||||
if title is not None:
|
|
||||||
q += build_term('title', title.split())
|
|
||||||
if author is not None:
|
|
||||||
q += ('+' if q else '')+build_term('author', author.split())
|
|
||||||
if publisher is not None:
|
|
||||||
q += ('+' if q else '')+build_term('publisher', publisher.split())
|
|
||||||
|
|
||||||
if isinstance(q, unicode):
|
|
||||||
q = q.encode('utf-8')
|
|
||||||
self.url = self.BASE_URL+urlencode({
|
|
||||||
'q':q,
|
|
||||||
'max-results':max_results,
|
|
||||||
'start-index':start_index,
|
|
||||||
'min-viewability':min_viewability,
|
|
||||||
})
|
|
||||||
|
|
||||||
def __call__(self, browser, verbose):
|
|
||||||
if verbose:
|
|
||||||
print 'Query:', self.url
|
|
||||||
feed = etree.fromstring(browser.open(self.url).read())
|
|
||||||
#print etree.tostring(feed, pretty_print=True)
|
|
||||||
total = int(total_results(feed)[0].text)
|
|
||||||
start = int(start_index(feed)[0].text)
|
|
||||||
entries = entry(feed)
|
|
||||||
new_start = start + len(entries)
|
|
||||||
if new_start > total:
|
|
||||||
new_start = 0
|
|
||||||
return entries, new_start
|
|
||||||
|
|
||||||
|
|
||||||
class ResultList(list):
|
|
||||||
|
|
||||||
def get_description(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
desc = description(entry)
|
|
||||||
if desc:
|
|
||||||
return 'SUMMARY:\n'+desc[0].text
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
|
|
||||||
def get_language(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
l = language(entry)
|
|
||||||
if l:
|
|
||||||
return l[0].text
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
|
|
||||||
def get_title(self, entry):
|
|
||||||
candidates = [x.text for x in title(entry)]
|
|
||||||
return ': '.join(candidates)
|
|
||||||
|
|
||||||
def get_authors(self, entry):
|
|
||||||
m = creator(entry)
|
|
||||||
if not m:
|
|
||||||
m = []
|
|
||||||
m = [x.text for x in m]
|
|
||||||
return m
|
|
||||||
|
|
||||||
def get_author_sort(self, entry, verbose):
|
|
||||||
for x in creator(entry):
|
|
||||||
for key, val in x.attrib.items():
|
|
||||||
if key.endswith('file-as'):
|
|
||||||
return val
|
|
||||||
|
|
||||||
def get_identifiers(self, entry, mi):
|
|
||||||
isbns = []
|
|
||||||
for x in identifier(entry):
|
|
||||||
t = str(x.text).strip()
|
|
||||||
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
|
|
||||||
if t[:5].upper() == 'ISBN:':
|
|
||||||
isbns.append(t[5:])
|
|
||||||
if isbns:
|
|
||||||
mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
|
|
||||||
|
|
||||||
def get_tags(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
btags = [x.text for x in subject(entry)]
|
|
||||||
tags = []
|
|
||||||
for t in btags:
|
|
||||||
tags.extend([y.strip() for y in t.split('/')])
|
|
||||||
tags = list(sorted(list(set(tags))))
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
tags = []
|
|
||||||
return [x.replace(',', ';') for x in tags]
|
|
||||||
|
|
||||||
def get_publisher(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
pub = publisher(entry)[0].text
|
|
||||||
except:
|
|
||||||
pub = None
|
|
||||||
return pub
|
|
||||||
|
|
||||||
def get_date(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
d = date(entry)
|
|
||||||
if d:
|
|
||||||
default = utcnow().replace(day=15)
|
|
||||||
d = parse_date(d[0].text, assume_utc=True, default=default)
|
|
||||||
else:
|
|
||||||
d = None
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
d = None
|
|
||||||
return d
|
|
||||||
|
|
||||||
def populate(self, entries, browser, verbose=False):
|
|
||||||
for x in entries:
|
|
||||||
try:
|
|
||||||
id_url = entry_id(x)[0].text
|
|
||||||
title = self.get_title(x)
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
mi = MetaInformation(title, self.get_authors(x))
|
|
||||||
try:
|
|
||||||
raw = browser.open(id_url).read()
|
|
||||||
feed = etree.fromstring(raw)
|
|
||||||
x = entry(feed)[0]
|
|
||||||
except Exception as e:
|
|
||||||
if verbose:
|
|
||||||
print 'Failed to get all details for an entry'
|
|
||||||
print e
|
|
||||||
mi.author_sort = self.get_author_sort(x, verbose)
|
|
||||||
mi.comments = self.get_description(x, verbose)
|
|
||||||
self.get_identifiers(x, mi)
|
|
||||||
mi.tags = self.get_tags(x, verbose)
|
|
||||||
mi.publisher = self.get_publisher(x, verbose)
|
|
||||||
mi.pubdate = self.get_date(x, verbose)
|
|
||||||
mi.language = self.get_language(x, verbose)
|
|
||||||
self.append(mi)
|
|
||||||
|
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None,
|
|
||||||
min_viewability='none', verbose=False, max_results=40):
|
|
||||||
br = browser()
|
|
||||||
br.set_handle_gzip(True)
|
|
||||||
start, entries = 1, []
|
|
||||||
while start > 0 and len(entries) <= max_results:
|
|
||||||
new, start = Query(title=title, author=author, publisher=publisher,
|
|
||||||
isbn=isbn, min_viewability=min_viewability)(br, verbose)
|
|
||||||
if not new:
|
|
||||||
break
|
|
||||||
entries.extend(new)
|
|
||||||
|
|
||||||
entries = entries[:max_results]
|
|
||||||
|
|
||||||
ans = ResultList()
|
|
||||||
ans.populate(entries, br, verbose)
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = OptionParser(textwrap.dedent(
|
|
||||||
'''\
|
|
||||||
%prog [options]
|
|
||||||
|
|
||||||
Fetch book metadata from Google. You must specify one of title, author,
|
|
||||||
publisher or ISBN. If you specify ISBN the others are ignored. Will
|
|
||||||
fetch a maximum of 100 matches, so you should make your query as
|
|
||||||
specific as possible.
|
|
||||||
'''
|
|
||||||
))
|
|
||||||
parser.add_option('-t', '--title', help='Book title')
|
|
||||||
parser.add_option('-a', '--author', help='Book author(s)')
|
|
||||||
parser.add_option('-p', '--publisher', help='Book publisher')
|
|
||||||
parser.add_option('-i', '--isbn', help='Book ISBN')
|
|
||||||
parser.add_option('-m', '--max-results', default=10,
|
|
||||||
help='Maximum number of results to fetch')
|
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
|
||||||
help='Be more verbose about errors')
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
try:
|
|
||||||
results = search(opts.title, opts.author, opts.publisher, opts.isbn,
|
|
||||||
verbose=opts.verbose, max_results=opts.max_results)
|
|
||||||
except AssertionError:
|
|
||||||
report(True)
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
for result in results:
|
|
||||||
print unicode(result).encode(preferred_encoding)
|
|
||||||
print
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -1,159 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
'''
|
|
||||||
Interface to isbndb.com. My key HLLXQX2A.
|
|
||||||
'''
|
|
||||||
|
|
||||||
import sys, re
|
|
||||||
from urllib import quote
|
|
||||||
|
|
||||||
from calibre.utils.config import OptionParser
|
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
|
||||||
from calibre import browser
|
|
||||||
|
|
||||||
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
|
|
||||||
|
|
||||||
class ISBNDBError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def fetch_metadata(url, max=3, timeout=5.):
|
|
||||||
books = []
|
|
||||||
page_number = 1
|
|
||||||
total_results = 31
|
|
||||||
br = browser()
|
|
||||||
while len(books) < total_results and max > 0:
|
|
||||||
try:
|
|
||||||
raw = br.open(url, timeout=timeout).read()
|
|
||||||
except Exception as err:
|
|
||||||
raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
|
|
||||||
soup = BeautifulStoneSoup(raw,
|
|
||||||
convertEntities=BeautifulStoneSoup.XML_ENTITIES)
|
|
||||||
book_list = soup.find('booklist')
|
|
||||||
if book_list is None:
|
|
||||||
errmsg = soup.find('errormessage').string
|
|
||||||
raise ISBNDBError('Error fetching metadata: '+errmsg)
|
|
||||||
total_results = int(book_list['total_results'])
|
|
||||||
page_number += 1
|
|
||||||
np = '&page_number=%s&'%page_number
|
|
||||||
url = re.sub(r'\&page_number=\d+\&', np, url)
|
|
||||||
books.extend(book_list.findAll('bookdata'))
|
|
||||||
max -= 1
|
|
||||||
return books
|
|
||||||
|
|
||||||
|
|
||||||
class ISBNDBMetadata(Metadata):
|
|
||||||
|
|
||||||
def __init__(self, book):
|
|
||||||
Metadata.__init__(self, None)
|
|
||||||
|
|
||||||
def tostring(e):
|
|
||||||
if not hasattr(e, 'string'):
|
|
||||||
return None
|
|
||||||
ans = e.string
|
|
||||||
if ans is not None:
|
|
||||||
ans = unicode(ans).strip()
|
|
||||||
if not ans:
|
|
||||||
ans = None
|
|
||||||
return ans
|
|
||||||
|
|
||||||
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
|
|
||||||
title = tostring(book.find('titlelong'))
|
|
||||||
if not title:
|
|
||||||
title = tostring(book.find('title'))
|
|
||||||
self.title = title
|
|
||||||
self.title = unicode(self.title).strip()
|
|
||||||
authors = []
|
|
||||||
au = tostring(book.find('authorstext'))
|
|
||||||
if au:
|
|
||||||
au = au.strip()
|
|
||||||
temp = au.split(',')
|
|
||||||
for au in temp:
|
|
||||||
if not au: continue
|
|
||||||
authors.extend([a.strip() for a in au.split('&')])
|
|
||||||
if authors:
|
|
||||||
self.authors = authors
|
|
||||||
try:
|
|
||||||
self.author_sort = tostring(book.find('authors').find('person'))
|
|
||||||
if self.authors and self.author_sort == self.authors[0]:
|
|
||||||
self.author_sort = None
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
self.publisher = tostring(book.find('publishertext'))
|
|
||||||
|
|
||||||
summ = tostring(book.find('summary'))
|
|
||||||
if summ:
|
|
||||||
self.comments = 'SUMMARY:\n'+summ
|
|
||||||
|
|
||||||
|
|
||||||
def build_isbn(base_url, opts):
|
|
||||||
return base_url + 'index1=isbn&value1='+opts.isbn
|
|
||||||
|
|
||||||
def build_combined(base_url, opts):
|
|
||||||
query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
|
|
||||||
if e is not None ])
|
|
||||||
query = query.strip()
|
|
||||||
if len(query) == 0:
|
|
||||||
raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
|
|
||||||
|
|
||||||
query = re.sub(r'\s+', '+', query)
|
|
||||||
if isinstance(query, unicode):
|
|
||||||
query = query.encode('utf-8')
|
|
||||||
return base_url+'index1=combined&value1='+quote(query, '+')
|
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = OptionParser(usage=\
|
|
||||||
_('''
|
|
||||||
%prog [options] key
|
|
||||||
|
|
||||||
Fetch metadata for books from isndb.com. You can specify either the
|
|
||||||
books ISBN ID or its title and author. If you specify the title and author,
|
|
||||||
then more than one book may be returned.
|
|
||||||
|
|
||||||
key is the account key you generate after signing up for a free account from isbndb.com.
|
|
||||||
|
|
||||||
'''))
|
|
||||||
parser.add_option('-i', '--isbn', default=None, dest='isbn',
|
|
||||||
help=_('The ISBN ID of the book you want metadata for.'))
|
|
||||||
parser.add_option('-a', '--author', dest='author',
|
|
||||||
default=None, help=_('The author whose book to search for.'))
|
|
||||||
parser.add_option('-t', '--title', dest='title',
|
|
||||||
default=None, help=_('The title of the book to search for.'))
|
|
||||||
parser.add_option('-p', '--publisher', default=None, dest='publisher',
|
|
||||||
help=_('The publisher of the book to search for.'))
|
|
||||||
parser.add_option('-v', '--verbose', default=False,
|
|
||||||
action='store_true', help=_('Verbose processing'))
|
|
||||||
|
|
||||||
return parser
|
|
||||||
|
|
||||||
|
|
||||||
def create_books(opts, args, timeout=5.):
|
|
||||||
base_url = BASE_URL%dict(key=args[1])
|
|
||||||
if opts.isbn is not None:
|
|
||||||
url = build_isbn(base_url, opts)
|
|
||||||
else:
|
|
||||||
url = build_combined(base_url, opts)
|
|
||||||
|
|
||||||
if opts.verbose:
|
|
||||||
print ('ISBNDB query: '+url)
|
|
||||||
|
|
||||||
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
|
|
||||||
#remove duplicates ISBN
|
|
||||||
return list(dict((book.isbn, book) for book in tans).values())
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
if len(args) != 2:
|
|
||||||
parser.print_help()
|
|
||||||
print ('You must supply the isbndb.com key')
|
|
||||||
return 1
|
|
||||||
|
|
||||||
for book in create_books(opts, args):
|
|
||||||
print unicode(book).encode('utf-8')
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -400,7 +400,8 @@ class MetadataUpdater(object):
|
|||||||
if getattr(self, 'exth', None) is None:
|
if getattr(self, 'exth', None) is None:
|
||||||
raise MobiError('No existing EXTH record. Cannot update metadata.')
|
raise MobiError('No existing EXTH record. Cannot update metadata.')
|
||||||
|
|
||||||
self.record0[92:96] = iana2mobi(mi.language)
|
if not mi.is_null('language'):
|
||||||
|
self.record0[92:96] = iana2mobi(mi.language)
|
||||||
self.create_exth(exth=exth, new_title=mi.title)
|
self.create_exth(exth=exth, new_title=mi.title)
|
||||||
|
|
||||||
# Fetch updated timestamp, cover_record, thumbnail_record
|
# Fetch updated timestamp, cover_record, thumbnail_record
|
||||||
|
@ -1,411 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import sys, textwrap, re, traceback, socket
|
|
||||||
from urllib import urlencode
|
|
||||||
from math import ceil
|
|
||||||
from copy import deepcopy
|
|
||||||
|
|
||||||
from lxml.html import soupparser
|
|
||||||
|
|
||||||
from calibre.utils.date import parse_date, utcnow, replace_months
|
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
|
||||||
from calibre import browser, preferred_encoding
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
|
||||||
authors_to_sort_string
|
|
||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
|
||||||
from calibre.ebooks.metadata.covers import CoverDownload
|
|
||||||
from calibre.utils.config import OptionParser
|
|
||||||
|
|
||||||
class NiceBooks(MetadataSource):
|
|
||||||
|
|
||||||
name = 'Nicebooks'
|
|
||||||
description = _('Downloads metadata from french Nicebooks')
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
author = 'Sengian'
|
|
||||||
version = (1, 0, 0)
|
|
||||||
|
|
||||||
def fetch(self):
|
|
||||||
try:
|
|
||||||
self.results = search(self.title, self.book_author, self.publisher,
|
|
||||||
self.isbn, max_results=10, verbose=self.verbose)
|
|
||||||
except Exception as e:
|
|
||||||
self.exception = e
|
|
||||||
self.tb = traceback.format_exc()
|
|
||||||
|
|
||||||
class NiceBooksCovers(CoverDownload):
|
|
||||||
|
|
||||||
name = 'Nicebooks covers'
|
|
||||||
description = _('Downloads covers from french Nicebooks')
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
author = 'Sengian'
|
|
||||||
type = _('Cover download')
|
|
||||||
version = (1, 0, 0)
|
|
||||||
|
|
||||||
def has_cover(self, mi, ans, timeout=5.):
|
|
||||||
if not mi.isbn:
|
|
||||||
return False
|
|
||||||
br = browser()
|
|
||||||
try:
|
|
||||||
entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
|
|
||||||
if Covers(mi.isbn)(entry).check_cover():
|
|
||||||
self.debug('cover for', mi.isbn, 'found')
|
|
||||||
ans.set()
|
|
||||||
except Exception as e:
|
|
||||||
self.debug(e)
|
|
||||||
|
|
||||||
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
|
||||||
if not mi.isbn:
|
|
||||||
return
|
|
||||||
br = browser()
|
|
||||||
try:
|
|
||||||
entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
|
|
||||||
cover_data, ext = Covers(mi.isbn)(entry).get_cover(br, timeout)
|
|
||||||
if not ext:
|
|
||||||
ext = 'jpg'
|
|
||||||
result_queue.put((True, cover_data, ext, self.name))
|
|
||||||
except Exception as e:
|
|
||||||
result_queue.put((False, self.exception_to_string(e),
|
|
||||||
traceback.format_exc(), self.name))
|
|
||||||
|
|
||||||
|
|
||||||
class NiceBooksError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class ISBNNotFound(NiceBooksError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def report(verbose):
|
|
||||||
if verbose:
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
class Query(object):
|
|
||||||
|
|
||||||
BASE_URL = 'http://fr.nicebooks.com/'
|
|
||||||
|
|
||||||
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None, max_results=20):
|
|
||||||
assert not(title is None and author is None and publisher is None \
|
|
||||||
and isbn is None and keywords is None)
|
|
||||||
assert (max_results < 21)
|
|
||||||
|
|
||||||
self.max_results = int(max_results)
|
|
||||||
|
|
||||||
if isbn is not None:
|
|
||||||
q = isbn
|
|
||||||
else:
|
|
||||||
q = ' '.join([i for i in (title, author, publisher, keywords) \
|
|
||||||
if i is not None])
|
|
||||||
|
|
||||||
if isinstance(q, unicode):
|
|
||||||
q = q.encode('utf-8')
|
|
||||||
self.urldata = 'search?' + urlencode({'q':q,'s':'Rechercher'})
|
|
||||||
|
|
||||||
def __call__(self, browser, verbose, timeout = 5.):
|
|
||||||
if verbose:
|
|
||||||
print _('Query: %s') % self.BASE_URL+self.urldata
|
|
||||||
|
|
||||||
try:
|
|
||||||
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
|
|
||||||
except Exception as e:
|
|
||||||
report(verbose)
|
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
|
||||||
e.getcode() == 404:
|
|
||||||
return
|
|
||||||
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
|
||||||
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
|
||||||
raise NiceBooksError(_('Nicebooks encountered an error.'))
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
return
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
feed = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
#nb of page to call
|
|
||||||
try:
|
|
||||||
nbresults = int(feed.xpath("//div[@id='topbar']/b")[0].text)
|
|
||||||
except:
|
|
||||||
#direct hit
|
|
||||||
return [feed]
|
|
||||||
|
|
||||||
nbpagetoquery = int(ceil(float(min(nbresults, self.max_results))/10))
|
|
||||||
pages =[feed]
|
|
||||||
if nbpagetoquery > 1:
|
|
||||||
for i in xrange(2, nbpagetoquery + 1):
|
|
||||||
try:
|
|
||||||
urldata = self.urldata + '&p=' + str(i)
|
|
||||||
raw = browser.open_novisit(self.BASE_URL+urldata, timeout=timeout).read()
|
|
||||||
except Exception as e:
|
|
||||||
continue
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
continue
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
feed = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
pages.append(feed)
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for x in pages:
|
|
||||||
results.extend([i.find_class('title')[0].get('href') \
|
|
||||||
for i in x.xpath("//ul[@id='results']/li")])
|
|
||||||
return results[:self.max_results]
|
|
||||||
|
|
||||||
class ResultList(list):
|
|
||||||
|
|
||||||
BASE_URL = 'http://fr.nicebooks.com'
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.repub = re.compile(u'\s*.diteur\s*', re.I)
|
|
||||||
self.reauteur = re.compile(u'\s*auteur.*', re.I)
|
|
||||||
self.reautclean = re.compile(u'\s*\(.*\)\s*')
|
|
||||||
|
|
||||||
def get_title(self, entry):
|
|
||||||
title = deepcopy(entry)
|
|
||||||
title.remove(title.find("dl[@title='Informations sur le livre']"))
|
|
||||||
title = ' '.join([i.text_content() for i in title.iterchildren()])
|
|
||||||
return unicode(title.replace('\n', ''))
|
|
||||||
|
|
||||||
def get_authors(self, entry):
|
|
||||||
author = entry.find("dl[@title='Informations sur le livre']")
|
|
||||||
authortext = []
|
|
||||||
for x in author.getiterator('dt'):
|
|
||||||
if self.reauteur.match(x.text):
|
|
||||||
elt = x.getnext()
|
|
||||||
while elt.tag == 'dd':
|
|
||||||
authortext.append(unicode(elt.text_content()))
|
|
||||||
elt = elt.getnext()
|
|
||||||
break
|
|
||||||
if len(authortext) == 1:
|
|
||||||
authortext = [self.reautclean.sub('', authortext[0])]
|
|
||||||
return authortext
|
|
||||||
|
|
||||||
def get_description(self, entry, verbose):
|
|
||||||
try:
|
|
||||||
return u'RESUME:\n' + unicode(entry.getparent().xpath("//p[@id='book-description']")[0].text)
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_book_info(self, entry, mi, verbose):
|
|
||||||
entry = entry.find("dl[@title='Informations sur le livre']")
|
|
||||||
for x in entry.getiterator('dt'):
|
|
||||||
if x.text == 'ISBN':
|
|
||||||
isbntext = x.getnext().text_content().replace('-', '')
|
|
||||||
if check_isbn(isbntext):
|
|
||||||
mi.isbn = unicode(isbntext)
|
|
||||||
elif self.repub.match(x.text):
|
|
||||||
mi.publisher = unicode(x.getnext().text_content())
|
|
||||||
elif x.text == 'Langue':
|
|
||||||
mi.language = unicode(x.getnext().text_content())
|
|
||||||
elif x.text == 'Date de parution':
|
|
||||||
d = x.getnext().text_content()
|
|
||||||
try:
|
|
||||||
default = utcnow().replace(day=15)
|
|
||||||
d = replace_months(d, 'fr')
|
|
||||||
d = parse_date(d, assume_utc=True, default=default)
|
|
||||||
mi.pubdate = d
|
|
||||||
except:
|
|
||||||
report(verbose)
|
|
||||||
return mi
|
|
||||||
|
|
||||||
def fill_MI(self, entry, title, authors, verbose):
|
|
||||||
mi = MetaInformation(title, authors)
|
|
||||||
mi.author_sort = authors_to_sort_string(authors)
|
|
||||||
mi.comments = self.get_description(entry, verbose)
|
|
||||||
return self.get_book_info(entry, mi, verbose)
|
|
||||||
|
|
||||||
def get_individual_metadata(self, browser, linkdata, verbose):
|
|
||||||
try:
|
|
||||||
raw = browser.open_novisit(self.BASE_URL + linkdata).read()
|
|
||||||
except Exception as e:
|
|
||||||
report(verbose)
|
|
||||||
if callable(getattr(e, 'getcode', None)) and \
|
|
||||||
e.getcode() == 404:
|
|
||||||
return
|
|
||||||
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
|
||||||
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
|
||||||
raise NiceBooksError(_('Nicebooks encountered an error.'))
|
|
||||||
if '<title>404 - ' in raw:
|
|
||||||
report(verbose)
|
|
||||||
return
|
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
|
||||||
resolve_entities=True)[0]
|
|
||||||
try:
|
|
||||||
feed = soupparser.fromstring(raw)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
#remove ASCII invalid chars
|
|
||||||
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# get results
|
|
||||||
return feed.xpath("//div[@id='container']")[0]
|
|
||||||
|
|
||||||
def populate(self, entries, browser, verbose=False):
|
|
||||||
#single entry
|
|
||||||
if len(entries) == 1 and not isinstance(entries[0], str):
|
|
||||||
try:
|
|
||||||
entry = entries[0].xpath("//div[@id='container']")[0]
|
|
||||||
entry = entry.find("div[@id='book-info']")
|
|
||||||
title = self.get_title(entry)
|
|
||||||
authors = self.get_authors(entry)
|
|
||||||
except Exception as e:
|
|
||||||
if verbose:
|
|
||||||
print 'Failed to get all details for an entry'
|
|
||||||
print e
|
|
||||||
return
|
|
||||||
self.append(self.fill_MI(entry, title, authors, verbose))
|
|
||||||
else:
|
|
||||||
#multiple entries
|
|
||||||
for x in entries:
|
|
||||||
try:
|
|
||||||
entry = self.get_individual_metadata(browser, x, verbose)
|
|
||||||
entry = entry.find("div[@id='book-info']")
|
|
||||||
title = self.get_title(entry)
|
|
||||||
authors = self.get_authors(entry)
|
|
||||||
except Exception as e:
|
|
||||||
if verbose:
|
|
||||||
print 'Failed to get all details for an entry'
|
|
||||||
print e
|
|
||||||
continue
|
|
||||||
self.append(self.fill_MI(entry, title, authors, verbose))
|
|
||||||
|
|
||||||
class Covers(object):
|
|
||||||
|
|
||||||
def __init__(self, isbn = None):
|
|
||||||
assert isbn is not None
|
|
||||||
self.urlimg = ''
|
|
||||||
self.isbn = isbn
|
|
||||||
self.isbnf = False
|
|
||||||
|
|
||||||
def __call__(self, entry = None):
|
|
||||||
try:
|
|
||||||
self.urlimg = entry.xpath("//div[@id='book-picture']/a")[0].get('href')
|
|
||||||
except:
|
|
||||||
return self
|
|
||||||
isbno = entry.get_element_by_id('book-info').find("dl[@title='Informations sur le livre']")
|
|
||||||
for x in isbno.getiterator('dt'):
|
|
||||||
if x.text == 'ISBN' and check_isbn(x.getnext().text_content()):
|
|
||||||
self.isbnf = True
|
|
||||||
break
|
|
||||||
return self
|
|
||||||
|
|
||||||
def check_cover(self):
|
|
||||||
return True if self.urlimg else False
|
|
||||||
|
|
||||||
def get_cover(self, browser, timeout = 5.):
|
|
||||||
try:
|
|
||||||
cover, ext = browser.open_novisit(self.urlimg, timeout=timeout).read(), \
|
|
||||||
self.urlimg.rpartition('.')[-1]
|
|
||||||
return cover, ext if ext else 'jpg'
|
|
||||||
except Exception as err:
|
|
||||||
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
|
||||||
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
|
||||||
if not len(self.urlimg):
|
|
||||||
if not self.isbnf:
|
|
||||||
raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
|
|
||||||
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
|
|
||||||
|
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None,
|
|
||||||
max_results=5, verbose=False, keywords=None):
|
|
||||||
br = browser()
|
|
||||||
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
|
||||||
keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
|
|
||||||
|
|
||||||
if entries is None or len(entries) == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
#List of entry
|
|
||||||
ans = ResultList()
|
|
||||||
ans.populate(entries, br, verbose)
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def check_for_cover(isbn):
|
|
||||||
br = browser()
|
|
||||||
entry = Query(isbn=isbn, max_results=1)(br, False)[0]
|
|
||||||
return Covers(isbn)(entry).check_cover()
|
|
||||||
|
|
||||||
def cover_from_isbn(isbn, timeout = 5.):
|
|
||||||
br = browser()
|
|
||||||
entry = Query(isbn=isbn, max_results=1)(br, False, timeout)[0]
|
|
||||||
return Covers(isbn)(entry).get_cover(br, timeout)
|
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = OptionParser(textwrap.dedent(\
|
|
||||||
_('''\
|
|
||||||
%prog [options]
|
|
||||||
|
|
||||||
Fetch book metadata from Nicebooks. You must specify one of title, author,
|
|
||||||
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
|
|
||||||
so you should make your query as specific as possible.
|
|
||||||
It can also get covers if the option is activated.
|
|
||||||
''')
|
|
||||||
))
|
|
||||||
parser.add_option('-t', '--title', help=_('Book title'))
|
|
||||||
parser.add_option('-a', '--author', help=_('Book author(s)'))
|
|
||||||
parser.add_option('-p', '--publisher', help=_('Book publisher'))
|
|
||||||
parser.add_option('-i', '--isbn', help=_('Book ISBN'))
|
|
||||||
parser.add_option('-k', '--keywords', help=_('Keywords'))
|
|
||||||
parser.add_option('-c', '--covers', default=0,
|
|
||||||
help=_('Covers: 1-Check/ 2-Download'))
|
|
||||||
parser.add_option('-p', '--coverspath', default='',
|
|
||||||
help=_('Covers files path'))
|
|
||||||
parser.add_option('-m', '--max-results', default=20,
|
|
||||||
help=_('Maximum number of results to fetch'))
|
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
|
||||||
help=_('Be more verbose about errors'))
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
import os
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
try:
|
|
||||||
results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
|
|
||||||
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
|
|
||||||
except AssertionError:
|
|
||||||
report(True)
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
if results is None or len(results) == 0:
|
|
||||||
print _('No result found for this search!')
|
|
||||||
return 0
|
|
||||||
for result in results:
|
|
||||||
print unicode(result).encode(preferred_encoding, 'replace')
|
|
||||||
covact = int(opts.covers)
|
|
||||||
if covact == 1:
|
|
||||||
textcover = _('No cover found!')
|
|
||||||
if check_for_cover(result.isbn):
|
|
||||||
textcover = _('A cover was found for this book')
|
|
||||||
print textcover
|
|
||||||
elif covact == 2:
|
|
||||||
cover_data, ext = cover_from_isbn(result.isbn)
|
|
||||||
cpath = result.isbn
|
|
||||||
if len(opts.coverspath):
|
|
||||||
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
|
|
||||||
oname = os.path.abspath(cpath+'.'+ext)
|
|
||||||
open(oname, 'wb').write(cover_data)
|
|
||||||
print _('Cover saved to file '), oname
|
|
||||||
print
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -966,7 +966,9 @@ class OPF(object): # {{{
|
|||||||
cover_id = covers[0].get('content')
|
cover_id = covers[0].get('content')
|
||||||
for item in self.itermanifest():
|
for item in self.itermanifest():
|
||||||
if item.get('id', None) == cover_id:
|
if item.get('id', None) == cover_id:
|
||||||
return item.get('href', None)
|
mt = item.get('media-type', '')
|
||||||
|
if 'xml' not in mt:
|
||||||
|
return item.get('href', None)
|
||||||
|
|
||||||
@dynamic_property
|
@dynamic_property
|
||||||
def cover(self):
|
def cover(self):
|
||||||
|
@ -301,7 +301,7 @@ class Amazon(Source):
|
|||||||
if asin is None:
|
if asin is None:
|
||||||
asin = identifiers.get('asin', None)
|
asin = identifiers.get('asin', None)
|
||||||
if asin:
|
if asin:
|
||||||
return 'http://amzn.com/%s'%asin
|
return ('amazon', asin, 'http://amzn.com/%s'%asin)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||||
|
@ -56,7 +56,8 @@ class InternalMetadataCompareKeyGen(object):
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
Generate a sort key for comparison of the relevance of Metadata objects,
|
Generate a sort key for comparison of the relevance of Metadata objects,
|
||||||
given a search query.
|
given a search query. This is used only to compare results from the same
|
||||||
|
metadata source, not across different sources.
|
||||||
|
|
||||||
The sort key ensures that an ascending order sort is a sort by order of
|
The sort key ensures that an ascending order sort is a sort by order of
|
||||||
decreasing relevance.
|
decreasing relevance.
|
||||||
@ -306,7 +307,7 @@ class Source(Plugin):
|
|||||||
title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
|
title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
|
||||||
[
|
[
|
||||||
# Remove things like: (2010) (Omnibus) etc.
|
# Remove things like: (2010) (Omnibus) etc.
|
||||||
(r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|mass\s*market|edition|ed\.)[\])}]', ''),
|
(r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|turtleback|mass\s*market|edition|ed\.)[\])}]', ''),
|
||||||
# Remove any strings that contain the substring edition inside
|
# Remove any strings that contain the substring edition inside
|
||||||
# parentheses
|
# parentheses
|
||||||
(r'(?i)[({\[].*?(edition|ed.).*?[\]})]', ''),
|
(r'(?i)[({\[].*?(edition|ed.).*?[\]})]', ''),
|
||||||
@ -374,7 +375,11 @@ class Source(Plugin):
|
|||||||
|
|
||||||
def get_book_url(self, identifiers):
|
def get_book_url(self, identifiers):
|
||||||
'''
|
'''
|
||||||
Return the URL for the book identified by identifiers at this source.
|
Return a 3-tuple or None. The 3-tuple is of the form:
|
||||||
|
(identifier_type, identifier_value, URL).
|
||||||
|
The URL is the URL for the book identified by identifiers at this
|
||||||
|
source. identifier_type, identifier_value specify the identifier
|
||||||
|
corresponding to the URL.
|
||||||
This URL must be browseable to by a human using a browser. It is meant
|
This URL must be browseable to by a human using a browser. It is meant
|
||||||
to provide a clickable link for the user to easily visit the books page
|
to provide a clickable link for the user to easily visit the books page
|
||||||
at this source.
|
at this source.
|
||||||
|
@ -19,13 +19,8 @@ from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
|||||||
from calibre.ebooks.metadata.sources.base import create_log
|
from calibre.ebooks.metadata.sources.base import create_log
|
||||||
from calibre.ebooks.metadata.sources.identify import identify
|
from calibre.ebooks.metadata.sources.identify import identify
|
||||||
from calibre.ebooks.metadata.sources.covers import download_cover
|
from calibre.ebooks.metadata.sources.covers import download_cover
|
||||||
from calibre.utils.config import test_eight_code
|
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
if not test_eight_code:
|
|
||||||
from calibre.ebooks.metadata.fetch import option_parser
|
|
||||||
return option_parser()
|
|
||||||
|
|
||||||
parser = OptionParser(textwrap.dedent(
|
parser = OptionParser(textwrap.dedent(
|
||||||
'''\
|
'''\
|
||||||
%prog [options]
|
%prog [options]
|
||||||
@ -48,9 +43,6 @@ def option_parser():
|
|||||||
return parser
|
return parser
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
if not test_eight_code:
|
|
||||||
from calibre.ebooks.metadata.fetch import main
|
|
||||||
return main(args)
|
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
opts, args = parser.parse_args(args)
|
opts, args = parser.parse_args(args)
|
||||||
|
|
||||||
|
@ -173,7 +173,7 @@ class GoogleBooks(Source):
|
|||||||
def get_book_url(self, identifiers): # {{{
|
def get_book_url(self, identifiers): # {{{
|
||||||
goog = identifiers.get('google', None)
|
goog = identifiers.get('google', None)
|
||||||
if goog is not None:
|
if goog is not None:
|
||||||
return 'http://books.google.com/books?id=%s'%goog
|
return ('google', goog, 'http://books.google.com/books?id=%s'%goog)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||||
|
@ -13,6 +13,7 @@ from Queue import Queue, Empty
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from operator import attrgetter
|
from operator import attrgetter
|
||||||
|
from urlparse import urlparse
|
||||||
|
|
||||||
from calibre.customize.ui import metadata_plugins, all_metadata_plugins
|
from calibre.customize.ui import metadata_plugins, all_metadata_plugins
|
||||||
from calibre.ebooks.metadata.sources.base import create_log, msprefs
|
from calibre.ebooks.metadata.sources.base import create_log, msprefs
|
||||||
@ -400,6 +401,9 @@ def identify(log, abort, # {{{
|
|||||||
and plugin.get_cached_cover_url(result.identifiers) is not
|
and plugin.get_cached_cover_url(result.identifiers) is not
|
||||||
None)
|
None)
|
||||||
result.identify_plugin = plugin
|
result.identify_plugin = plugin
|
||||||
|
if msprefs['txt_comments']:
|
||||||
|
if plugin.has_html_comments and result.comments:
|
||||||
|
result.comments = html2text(result.comments)
|
||||||
|
|
||||||
log('The identify phase took %.2f seconds'%(time.time() - start_time))
|
log('The identify phase took %.2f seconds'%(time.time() - start_time))
|
||||||
log('The longest time (%f) was taken by:'%longest, lp)
|
log('The longest time (%f) was taken by:'%longest, lp)
|
||||||
@ -410,10 +414,6 @@ def identify(log, abort, # {{{
|
|||||||
log('We have %d merged results, merging took: %.2f seconds' %
|
log('We have %d merged results, merging took: %.2f seconds' %
|
||||||
(len(results), time.time() - start_time))
|
(len(results), time.time() - start_time))
|
||||||
|
|
||||||
if msprefs['txt_comments']:
|
|
||||||
for r in results:
|
|
||||||
if r.identify_plugin.has_html_comments and r.comments:
|
|
||||||
r.comments = html2text(r.comments)
|
|
||||||
|
|
||||||
max_tags = msprefs['max_tags']
|
max_tags = msprefs['max_tags']
|
||||||
for r in results:
|
for r in results:
|
||||||
@ -435,18 +435,38 @@ def identify(log, abort, # {{{
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def urls_from_identifiers(identifiers): # {{{
|
def urls_from_identifiers(identifiers): # {{{
|
||||||
|
identifiers = dict([(k.lower(), v) for k, v in identifiers.iteritems()])
|
||||||
ans = []
|
ans = []
|
||||||
for plugin in all_metadata_plugins():
|
for plugin in all_metadata_plugins():
|
||||||
try:
|
try:
|
||||||
url = plugin.get_book_url(identifiers)
|
id_type, id_val, url = plugin.get_book_url(identifiers)
|
||||||
if url is not None:
|
ans.append((plugin.name, id_type, id_val, url))
|
||||||
ans.append((plugin.name, url))
|
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
isbn = identifiers.get('isbn', None)
|
isbn = identifiers.get('isbn', None)
|
||||||
if isbn:
|
if isbn:
|
||||||
ans.append((isbn,
|
ans.append((isbn, 'isbn', isbn,
|
||||||
'http://www.worldcat.org/search?q=bn%%3A%s&qt=advanced'%isbn))
|
'http://www.worldcat.org/isbn/'+isbn))
|
||||||
|
doi = identifiers.get('doi', None)
|
||||||
|
if doi:
|
||||||
|
ans.append(('DOI', 'doi', doi,
|
||||||
|
'http://dx.doi.org/'+doi))
|
||||||
|
arxiv = identifiers.get('arxiv', None)
|
||||||
|
if arxiv:
|
||||||
|
ans.append(('arXiv', 'arxiv', arxiv,
|
||||||
|
'http://arxiv.org/abs/'+arxiv))
|
||||||
|
oclc = identifiers.get('oclc', None)
|
||||||
|
if oclc:
|
||||||
|
ans.append(('OCLC', 'oclc', oclc,
|
||||||
|
'http://www.worldcat.org/oclc/'+oclc))
|
||||||
|
url = identifiers.get('uri', None)
|
||||||
|
if url is None:
|
||||||
|
url = identifiers.get('url', None)
|
||||||
|
if url and url.startswith('http'):
|
||||||
|
url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
|
||||||
|
parts = urlparse(url)
|
||||||
|
name = parts.netloc
|
||||||
|
ans.append((name, 'url', url, url))
|
||||||
return ans
|
return ans
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ class ISBNDB(Source):
|
|||||||
author_tokens = self.get_author_tokens(authors,
|
author_tokens = self.get_author_tokens(authors,
|
||||||
only_first_author=True)
|
only_first_author=True)
|
||||||
tokens += author_tokens
|
tokens += author_tokens
|
||||||
tokens = [quote(t) for t in tokens]
|
tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in tokens]
|
||||||
q = '+'.join(tokens)
|
q = '+'.join(tokens)
|
||||||
q = 'index1=combined&value1='+q
|
q = 'index1=combined&value1='+q
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ class OverDrive(Source):
|
|||||||
cached_cover_url_is_reliable = True
|
cached_cover_url_is_reliable = True
|
||||||
|
|
||||||
options = (
|
options = (
|
||||||
Option('get_full_metadata', 'bool', False,
|
Option('get_full_metadata', 'bool', True,
|
||||||
_('Download all metadata (slow)'),
|
_('Download all metadata (slow)'),
|
||||||
_('Enable this option to gather all metadata available from Overdrive.')),
|
_('Enable this option to gather all metadata available from Overdrive.')),
|
||||||
)
|
)
|
||||||
@ -265,7 +265,7 @@ class OverDrive(Source):
|
|||||||
if creators:
|
if creators:
|
||||||
creators = creators.split(', ')
|
creators = creators.split(', ')
|
||||||
# if an exact match in a preferred format occurs
|
# if an exact match in a preferred format occurs
|
||||||
if ((author and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
|
if ((author and creators and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
|
||||||
return self.format_results(reserveid, od_title, subtitle, series, publisher,
|
return self.format_results(reserveid, od_title, subtitle, series, publisher,
|
||||||
creators, thumbimage, worldcatlink, formatid)
|
creators, thumbimage, worldcatlink, formatid)
|
||||||
else:
|
else:
|
||||||
@ -291,7 +291,7 @@ class OverDrive(Source):
|
|||||||
close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
||||||
else:
|
else:
|
||||||
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
||||||
|
|
||||||
elif close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
|
elif close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
|
||||||
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
||||||
|
|
||||||
|
@ -222,7 +222,7 @@ class SaveWorker(Thread):
|
|||||||
if isbytestring(fpath):
|
if isbytestring(fpath):
|
||||||
fpath = fpath.decode(filesystem_encoding)
|
fpath = fpath.decode(filesystem_encoding)
|
||||||
formats[fmt.lower()] = fpath
|
formats[fmt.lower()] = fpath
|
||||||
data[i] = [opf, cpath, formats]
|
data[i] = [opf, cpath, formats, mi.last_modified.isoformat()]
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
@ -253,6 +253,8 @@ class MobiReader(object):
|
|||||||
|
|
||||||
.italic { font-style: italic }
|
.italic { font-style: italic }
|
||||||
|
|
||||||
|
.underline { text-decoration: underline }
|
||||||
|
|
||||||
.mbp_pagebreak {
|
.mbp_pagebreak {
|
||||||
page-break-after: always; margin: 0; display: block
|
page-break-after: always; margin: 0; display: block
|
||||||
}
|
}
|
||||||
@ -601,6 +603,9 @@ class MobiReader(object):
|
|||||||
elif tag.tag == 'i':
|
elif tag.tag == 'i':
|
||||||
tag.tag = 'span'
|
tag.tag = 'span'
|
||||||
tag.attrib['class'] = 'italic'
|
tag.attrib['class'] = 'italic'
|
||||||
|
elif tag.tag == 'u':
|
||||||
|
tag.tag = 'span'
|
||||||
|
tag.attrib['class'] = 'underline'
|
||||||
elif tag.tag == 'b':
|
elif tag.tag == 'b':
|
||||||
tag.tag = 'span'
|
tag.tag = 'span'
|
||||||
tag.attrib['class'] = 'bold'
|
tag.attrib['class'] = 'bold'
|
||||||
|
@ -7,6 +7,8 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Convert an ODT file into a Open Ebook
|
Convert an ODT file into a Open Ebook
|
||||||
'''
|
'''
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
from odf.odf2xhtml import ODF2XHTML
|
from odf.odf2xhtml import ODF2XHTML
|
||||||
|
|
||||||
from calibre import CurrentDir, walk
|
from calibre import CurrentDir, walk
|
||||||
@ -23,7 +25,51 @@ class Extract(ODF2XHTML):
|
|||||||
with open(name, 'wb') as f:
|
with open(name, 'wb') as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
|
|
||||||
def __call__(self, stream, odir):
|
def filter_css(self, html, log):
|
||||||
|
root = etree.fromstring(html)
|
||||||
|
style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
|
||||||
|
if style:
|
||||||
|
style = style[0]
|
||||||
|
css = style.text
|
||||||
|
if css:
|
||||||
|
style.text, sel_map = self.do_filter_css(css)
|
||||||
|
for x in root.xpath('//*[@class]'):
|
||||||
|
extra = []
|
||||||
|
orig = x.get('class')
|
||||||
|
for cls in orig.split():
|
||||||
|
extra.extend(sel_map.get(cls, []))
|
||||||
|
if extra:
|
||||||
|
x.set('class', orig + ' ' + ' '.join(extra))
|
||||||
|
html = etree.tostring(root, encoding='utf-8',
|
||||||
|
xml_declaration=True)
|
||||||
|
return html
|
||||||
|
|
||||||
|
def do_filter_css(self, css):
|
||||||
|
from cssutils import parseString
|
||||||
|
from cssutils.css import CSSRule
|
||||||
|
sheet = parseString(css)
|
||||||
|
rules = list(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
|
||||||
|
sel_map = {}
|
||||||
|
count = 0
|
||||||
|
for r in rules:
|
||||||
|
# Check if we have only class selectors for this rule
|
||||||
|
nc = [x for x in r.selectorList if not
|
||||||
|
x.selectorText.startswith('.')]
|
||||||
|
if len(r.selectorList) > 1 and not nc:
|
||||||
|
# Replace all the class selectors with a single class selector
|
||||||
|
# This will be added to the class attribute of all elements
|
||||||
|
# that have one of these selectors.
|
||||||
|
replace_name = 'c_odt%d'%count
|
||||||
|
count += 1
|
||||||
|
for sel in r.selectorList:
|
||||||
|
s = sel.selectorText[1:]
|
||||||
|
if s not in sel_map:
|
||||||
|
sel_map[s] = []
|
||||||
|
sel_map[s].append(replace_name)
|
||||||
|
r.selectorText = '.'+replace_name
|
||||||
|
return sheet.cssText, sel_map
|
||||||
|
|
||||||
|
def __call__(self, stream, odir, log):
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
@ -32,13 +78,17 @@ class Extract(ODF2XHTML):
|
|||||||
if not os.path.exists(odir):
|
if not os.path.exists(odir):
|
||||||
os.makedirs(odir)
|
os.makedirs(odir)
|
||||||
with CurrentDir(odir):
|
with CurrentDir(odir):
|
||||||
print 'Extracting ODT file...'
|
log('Extracting ODT file...')
|
||||||
html = self.odf2xhtml(stream)
|
html = self.odf2xhtml(stream)
|
||||||
# A blanket img specification like this causes problems
|
# A blanket img specification like this causes problems
|
||||||
# with EPUB output as the contaiing element often has
|
# with EPUB output as the containing element often has
|
||||||
# an absolute height and width set that is larger than
|
# an absolute height and width set that is larger than
|
||||||
# the available screen real estate
|
# the available screen real estate
|
||||||
html = html.replace('img { width: 100%; height: 100%; }', '')
|
html = html.replace('img { width: 100%; height: 100%; }', '')
|
||||||
|
try:
|
||||||
|
html = self.filter_css(html, log)
|
||||||
|
except:
|
||||||
|
log.exception('Failed to filter CSS, conversion may be slow')
|
||||||
with open('index.xhtml', 'wb') as f:
|
with open('index.xhtml', 'wb') as f:
|
||||||
f.write(html.encode('utf-8'))
|
f.write(html.encode('utf-8'))
|
||||||
zf = ZipFile(stream, 'r')
|
zf = ZipFile(stream, 'r')
|
||||||
@ -67,7 +117,7 @@ class ODTInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
return Extract()(stream, '.')
|
return Extract()(stream, '.', log)
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log):
|
def postprocess_book(self, oeb, opts, log):
|
||||||
# Fix <p><div> constructs as the asinine epubchecker complains
|
# Fix <p><div> constructs as the asinine epubchecker complains
|
||||||
|
@ -16,7 +16,7 @@ from urllib import unquote as urlunquote
|
|||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
from calibre.constants import filesystem_encoding, __version__
|
from calibre.constants import filesystem_encoding, __version__
|
||||||
from calibre.translations.dynamic import translate
|
from calibre.translations.dynamic import translate
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
||||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||||
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
||||||
from calibre import isbytestring, as_unicode, get_types_map
|
from calibre import isbytestring, as_unicode, get_types_map
|
||||||
@ -446,22 +446,23 @@ class NullContainer(object):
|
|||||||
class DirContainer(object):
|
class DirContainer(object):
|
||||||
"""Filesystem directory container."""
|
"""Filesystem directory container."""
|
||||||
|
|
||||||
def __init__(self, path, log):
|
def __init__(self, path, log, ignore_opf=False):
|
||||||
self.log = log
|
self.log = log
|
||||||
if isbytestring(path):
|
if isbytestring(path):
|
||||||
path = path.decode(filesystem_encoding)
|
path = path.decode(filesystem_encoding)
|
||||||
|
self.opfname = None
|
||||||
ext = os.path.splitext(path)[1].lower()
|
ext = os.path.splitext(path)[1].lower()
|
||||||
if ext == '.opf':
|
if ext == '.opf':
|
||||||
self.opfname = os.path.basename(path)
|
self.opfname = os.path.basename(path)
|
||||||
self.rootdir = os.path.dirname(path)
|
self.rootdir = os.path.dirname(path)
|
||||||
return
|
return
|
||||||
self.rootdir = path
|
self.rootdir = path
|
||||||
for path in self.namelist():
|
if not ignore_opf:
|
||||||
ext = os.path.splitext(path)[1].lower()
|
for path in self.namelist():
|
||||||
if ext == '.opf':
|
ext = os.path.splitext(path)[1].lower()
|
||||||
self.opfname = path
|
if ext == '.opf':
|
||||||
return
|
self.opfname = path
|
||||||
self.opfname = None
|
return
|
||||||
|
|
||||||
def read(self, path):
|
def read(self, path):
|
||||||
if path is None:
|
if path is None:
|
||||||
@ -852,6 +853,7 @@ class Manifest(object):
|
|||||||
self.oeb.log.debug('Parsing', self.href, '...')
|
self.oeb.log.debug('Parsing', self.href, '...')
|
||||||
# Convert to Unicode and normalize line endings
|
# Convert to Unicode and normalize line endings
|
||||||
data = self.oeb.decode(data)
|
data = self.oeb.decode(data)
|
||||||
|
data = strip_encoding_declarations(data)
|
||||||
data = self.oeb.html_preprocessor(data)
|
data = self.oeb.html_preprocessor(data)
|
||||||
# There could be null bytes in data if it had � entities in it
|
# There could be null bytes in data if it had � entities in it
|
||||||
data = data.replace('\0', '')
|
data = data.replace('\0', '')
|
||||||
@ -1047,8 +1049,8 @@ class Manifest(object):
|
|||||||
|
|
||||||
# Remove hyperlinks with no content as they cause rendering
|
# Remove hyperlinks with no content as they cause rendering
|
||||||
# artifacts in browser based renderers
|
# artifacts in browser based renderers
|
||||||
# Also remove empty <b> and <i> tags
|
# Also remove empty <b>, <u> and <i> tags
|
||||||
for a in xpath(data, '//h:a[@href]|//h:i|//h:b'):
|
for a in xpath(data, '//h:a[@href]|//h:i|//h:b|//h:u'):
|
||||||
if a.get('id', None) is None and a.get('name', None) is None \
|
if a.get('id', None) is None and a.get('name', None) is None \
|
||||||
and len(a) == 0 and not a.text:
|
and len(a) == 0 and not a.text:
|
||||||
remove_elem(a)
|
remove_elem(a)
|
||||||
|
@ -125,7 +125,19 @@ class Stylizer(object):
|
|||||||
def __init__(self, tree, path, oeb, opts, profile=None,
|
def __init__(self, tree, path, oeb, opts, profile=None,
|
||||||
extra_css='', user_css=''):
|
extra_css='', user_css=''):
|
||||||
self.oeb, self.opts = oeb, opts
|
self.oeb, self.opts = oeb, opts
|
||||||
self.profile = opts.input_profile
|
self.profile = profile
|
||||||
|
if self.profile is None:
|
||||||
|
# Use the default profile. This should really be using
|
||||||
|
# opts.output_profile, but I don't want to risk changing it, as
|
||||||
|
# doing so might well have hard to debug font size effects.
|
||||||
|
from calibre.customize.ui import output_profiles
|
||||||
|
for x in output_profiles():
|
||||||
|
if x.short_name == 'default':
|
||||||
|
self.profile = x
|
||||||
|
break
|
||||||
|
if self.profile is None:
|
||||||
|
# Just in case the default profile is removed in the future :)
|
||||||
|
self.profile = opts.output_profile
|
||||||
self.logger = oeb.logger
|
self.logger = oeb.logger
|
||||||
item = oeb.manifest.hrefs[path]
|
item = oeb.manifest.hrefs[path]
|
||||||
basename = os.path.basename(path)
|
basename = os.path.basename(path)
|
||||||
|
@ -36,7 +36,7 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
|
|||||||
m.clear('description')
|
m.clear('description')
|
||||||
m.add('description', mi.comments)
|
m.add('description', mi.comments)
|
||||||
elif override_input_metadata:
|
elif override_input_metadata:
|
||||||
m.clear('description')
|
m.clear('description')
|
||||||
if not mi.is_null('publisher'):
|
if not mi.is_null('publisher'):
|
||||||
m.clear('publisher')
|
m.clear('publisher')
|
||||||
m.add('publisher', mi.publisher)
|
m.add('publisher', mi.publisher)
|
||||||
|
@ -16,6 +16,7 @@ from calibre import CurrentDir
|
|||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
from calibre.ptempfile import TemporaryFile
|
from calibre.ptempfile import TemporaryFile
|
||||||
from calibre.utils.magick import Image, create_canvas
|
from calibre.utils.magick import Image, create_canvas
|
||||||
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
|
|
||||||
DATATYPE_PHTML = 0
|
DATATYPE_PHTML = 0
|
||||||
DATATYPE_PHTML_COMPRESSED = 1
|
DATATYPE_PHTML_COMPRESSED = 1
|
||||||
@ -359,7 +360,7 @@ class Reader(FormatReader):
|
|||||||
# plugin assemble the order based on hyperlinks.
|
# plugin assemble the order based on hyperlinks.
|
||||||
with CurrentDir(output_dir):
|
with CurrentDir(output_dir):
|
||||||
for uid, num in self.uid_text_secion_number.items():
|
for uid, num in self.uid_text_secion_number.items():
|
||||||
self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid)))
|
self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid))
|
||||||
with open('%s.html' % uid, 'wb') as htmlf:
|
with open('%s.html' % uid, 'wb') as htmlf:
|
||||||
html = u'<html><body>'
|
html = u'<html><body>'
|
||||||
section_header, section_data = self.sections[num]
|
section_header, section_data = self.sections[num]
|
||||||
@ -465,7 +466,7 @@ class Reader(FormatReader):
|
|||||||
if not home_html:
|
if not home_html:
|
||||||
home_html = self.uid_text_secion_number.items()[0][0]
|
home_html = self.uid_text_secion_number.items()[0][0]
|
||||||
except:
|
except:
|
||||||
raise Exception(_('Could not determine home.html'))
|
raise Exception('Could not determine home.html')
|
||||||
# Generate oeb from html conversion.
|
# Generate oeb from html conversion.
|
||||||
oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
|
oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
|
||||||
self.options.debug_pipeline = odi
|
self.options.debug_pipeline = odi
|
||||||
|
@ -32,10 +32,11 @@ class PDFInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert_new(self, stream, accelerators):
|
def convert_new(self, stream, accelerators):
|
||||||
from calibre.ebooks.pdf.reflow import PDFDocument
|
from calibre.ebooks.pdf.reflow import PDFDocument
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
if pdfreflow_err:
|
if pdfreflow_err:
|
||||||
raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
|
raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
|
||||||
pdfreflow.reflow(stream.read(), 1, -1)
|
pdfreflow.reflow(stream.read(), 1, -1)
|
||||||
xml = open('index.xml', 'rb').read()
|
xml = clean_ascii_chars(open('index.xml', 'rb').read())
|
||||||
PDFDocument(xml, self.opts, self.log)
|
PDFDocument(xml, self.opts, self.log)
|
||||||
return os.path.join(os.getcwd(), 'metadata.opf')
|
return os.path.join(os.getcwd(), 'metadata.opf')
|
||||||
|
|
||||||
|
@ -15,7 +15,6 @@ import cStringIO
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
from calibre.utils.filenames import ascii_text
|
|
||||||
from calibre.utils.magick.draw import save_cover_data_to, identify_data
|
from calibre.utils.magick.draw import save_cover_data_to, identify_data
|
||||||
|
|
||||||
TAGS = {
|
TAGS = {
|
||||||
@ -79,8 +78,7 @@ def txt2rtf(text):
|
|||||||
elif val <= 127:
|
elif val <= 127:
|
||||||
buf.write(x)
|
buf.write(x)
|
||||||
else:
|
else:
|
||||||
repl = ascii_text(x)
|
c = r'\u{0:d}?'.format(val)
|
||||||
c = r'\uc{2}\u{0:d}{1}'.format(val, repl, len(repl))
|
|
||||||
buf.write(c)
|
buf.write(c)
|
||||||
return buf.getvalue()
|
return buf.getvalue()
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ if isosx:
|
|||||||
)
|
)
|
||||||
gprefs.defaults['action-layout-toolbar'] = (
|
gprefs.defaults['action-layout-toolbar'] = (
|
||||||
'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
|
'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
|
||||||
'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
|
'Choose Library', 'Donate', None, 'Fetch News', 'Store', 'Save To Disk',
|
||||||
'Connect Share', None, 'Remove Books',
|
'Connect Share', None, 'Remove Books',
|
||||||
)
|
)
|
||||||
gprefs.defaults['action-layout-toolbar-device'] = (
|
gprefs.defaults['action-layout-toolbar-device'] = (
|
||||||
@ -48,7 +48,7 @@ else:
|
|||||||
gprefs.defaults['action-layout-menubar-device'] = ()
|
gprefs.defaults['action-layout-menubar-device'] = ()
|
||||||
gprefs.defaults['action-layout-toolbar'] = (
|
gprefs.defaults['action-layout-toolbar'] = (
|
||||||
'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
|
'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
|
||||||
'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
|
'Choose Library', 'Donate', None, 'Fetch News', 'Store', 'Save To Disk',
|
||||||
'Connect Share', None, 'Remove Books', None, 'Help', 'Preferences',
|
'Connect Share', None, 'Remove Books', None, 'Help', 'Preferences',
|
||||||
)
|
)
|
||||||
gprefs.defaults['action-layout-toolbar-device'] = (
|
gprefs.defaults['action-layout-toolbar-device'] = (
|
||||||
@ -739,12 +739,6 @@ def build_forms(srcdir, info=None):
|
|||||||
dat = dat.replace('from QtWebKit.QWebView import QWebView',
|
dat = dat.replace('from QtWebKit.QWebView import QWebView',
|
||||||
'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
|
'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
|
||||||
|
|
||||||
if form.endswith('viewer%smain.ui'%os.sep):
|
|
||||||
info('\t\tPromoting WebView')
|
|
||||||
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
|
|
||||||
dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(')
|
|
||||||
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
|
|
||||||
|
|
||||||
open(compiled_form, 'wb').write(dat)
|
open(compiled_form, 'wb').write(dat)
|
||||||
|
|
||||||
_df = os.environ.get('CALIBRE_DEVELOP_FROM', None)
|
_df = os.environ.get('CALIBRE_DEVELOP_FROM', None)
|
||||||
|
@ -20,9 +20,8 @@ from calibre.ebooks import BOOK_EXTENSIONS
|
|||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.constants import preferred_encoding, filesystem_encoding
|
from calibre.constants import preferred_encoding, filesystem_encoding
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
from calibre.gui2 import config, question_dialog
|
from calibre.gui2 import question_dialog
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.utils.config import test_eight_code
|
|
||||||
from calibre.ebooks.metadata.sources.base import msprefs
|
from calibre.ebooks.metadata.sources.base import msprefs
|
||||||
|
|
||||||
def get_filters():
|
def get_filters():
|
||||||
@ -180,26 +179,17 @@ class AddAction(InterfaceAction):
|
|||||||
except IndexError:
|
except IndexError:
|
||||||
self.gui.library_view.model().books_added(self.isbn_add_dialog.value)
|
self.gui.library_view.model().books_added(self.isbn_add_dialog.value)
|
||||||
self.isbn_add_dialog.accept()
|
self.isbn_add_dialog.accept()
|
||||||
if test_eight_code:
|
orig = msprefs['ignore_fields']
|
||||||
orig = msprefs['ignore_fields']
|
new = list(orig)
|
||||||
new = list(orig)
|
for x in ('title', 'authors'):
|
||||||
for x in ('title', 'authors'):
|
if x in new:
|
||||||
if x in new:
|
new.remove(x)
|
||||||
new.remove(x)
|
msprefs['ignore_fields'] = new
|
||||||
msprefs['ignore_fields'] = new
|
try:
|
||||||
try:
|
self.gui.iactions['Edit Metadata'].download_metadata(
|
||||||
self.gui.iactions['Edit Metadata'].download_metadata(
|
ids=self.add_by_isbn_ids)
|
||||||
ids=self.add_by_isbn_ids)
|
finally:
|
||||||
finally:
|
msprefs['ignore_fields'] = orig
|
||||||
msprefs['ignore_fields'] = orig
|
|
||||||
else:
|
|
||||||
orig = config['overwrite_author_title_metadata']
|
|
||||||
config['overwrite_author_title_metadata'] = True
|
|
||||||
try:
|
|
||||||
self.gui.iactions['Edit Metadata'].do_download_metadata(
|
|
||||||
self.add_by_isbn_ids)
|
|
||||||
finally:
|
|
||||||
config['overwrite_author_title_metadata'] = orig
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@ -246,7 +246,8 @@ class ChooseLibraryAction(InterfaceAction):
|
|||||||
def delete_requested(self, name, location):
|
def delete_requested(self, name, location):
|
||||||
loc = location.replace('/', os.sep)
|
loc = location.replace('/', os.sep)
|
||||||
if not question_dialog(self.gui, _('Are you sure?'), '<p>'+
|
if not question_dialog(self.gui, _('Are you sure?'), '<p>'+
|
||||||
_('All files from %s will be '
|
_('<b style="color: red">All files</b> (not just ebooks) '
|
||||||
|
'from <br><br><b>%s</b><br><br> will be '
|
||||||
'<b>permanently deleted</b>. Are you sure?') % loc,
|
'<b>permanently deleted</b>. Are you sure?') % loc,
|
||||||
show_copy_button=False):
|
show_copy_button=False):
|
||||||
return
|
return
|
||||||
|
@ -10,15 +10,13 @@ from functools import partial
|
|||||||
|
|
||||||
from PyQt4.Qt import Qt, QMenu, QModelIndex, QTimer
|
from PyQt4.Qt import Qt, QMenu, QModelIndex, QTimer
|
||||||
|
|
||||||
from calibre.gui2 import error_dialog, config, Dispatcher, question_dialog
|
from calibre.gui2 import error_dialog, Dispatcher, question_dialog
|
||||||
from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
|
|
||||||
from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
|
from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
|
||||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||||
from calibre.gui2.dialogs.tag_list_editor import TagListEditor
|
from calibre.gui2.dialogs.tag_list_editor import TagListEditor
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
from calibre.utils.config import test_eight_code
|
|
||||||
|
|
||||||
class EditMetadataAction(InterfaceAction):
|
class EditMetadataAction(InterfaceAction):
|
||||||
|
|
||||||
@ -36,22 +34,8 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
md.addAction(_('Edit metadata in bulk'),
|
md.addAction(_('Edit metadata in bulk'),
|
||||||
partial(self.edit_metadata, False, bulk=True))
|
partial(self.edit_metadata, False, bulk=True))
|
||||||
md.addSeparator()
|
md.addSeparator()
|
||||||
if test_eight_code:
|
md.addAction(_('Download metadata and covers'), self.download_metadata,
|
||||||
dall = self.download_metadata
|
|
||||||
else:
|
|
||||||
dall = partial(self.download_metadata_old, False, covers=True)
|
|
||||||
dident = partial(self.download_metadata_old, False, covers=False)
|
|
||||||
dcovers = partial(self.download_metadata_old, False, covers=True,
|
|
||||||
set_metadata=False, set_social_metadata=False)
|
|
||||||
|
|
||||||
md.addAction(_('Download metadata and covers'), dall,
|
|
||||||
Qt.ControlModifier+Qt.Key_D)
|
Qt.ControlModifier+Qt.Key_D)
|
||||||
if not test_eight_code:
|
|
||||||
md.addAction(_('Download only metadata'), dident)
|
|
||||||
md.addAction(_('Download only covers'), dcovers)
|
|
||||||
md.addAction(_('Download only social metadata'),
|
|
||||||
partial(self.download_metadata_old, False, covers=False,
|
|
||||||
set_metadata=False, set_social_metadata=True))
|
|
||||||
self.metadata_menu = md
|
self.metadata_menu = md
|
||||||
|
|
||||||
mb = QMenu()
|
mb = QMenu()
|
||||||
@ -88,7 +72,7 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
_('No books selected'), show=True)
|
_('No books selected'), show=True)
|
||||||
db = self.gui.library_view.model().db
|
db = self.gui.library_view.model().db
|
||||||
ids = [db.id(row.row()) for row in rows]
|
ids = [db.id(row.row()) for row in rows]
|
||||||
from calibre.gui2.metadata.bulk_download2 import start_download
|
from calibre.gui2.metadata.bulk_download import start_download
|
||||||
start_download(self.gui, ids,
|
start_download(self.gui, ids,
|
||||||
Dispatcher(self.metadata_downloaded))
|
Dispatcher(self.metadata_downloaded))
|
||||||
|
|
||||||
@ -96,7 +80,7 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
if job.failed:
|
if job.failed:
|
||||||
self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
|
self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
|
||||||
return
|
return
|
||||||
from calibre.gui2.metadata.bulk_download2 import get_job_details
|
from calibre.gui2.metadata.bulk_download import get_job_details
|
||||||
id_map, failed_ids, failed_covers, all_failed, det_msg = \
|
id_map, failed_ids, failed_covers, all_failed, det_msg = \
|
||||||
get_job_details(job)
|
get_job_details(job)
|
||||||
if all_failed:
|
if all_failed:
|
||||||
@ -112,8 +96,9 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
show_copy_button = False
|
show_copy_button = False
|
||||||
if failed_ids or failed_covers:
|
if failed_ids or failed_covers:
|
||||||
show_copy_button = True
|
show_copy_button = True
|
||||||
|
num = len(failed_ids.union(failed_covers))
|
||||||
msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
|
msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
|
||||||
' "Show details" to see which books.')%len(failed_ids)
|
' "Show details" to see which books.')%num
|
||||||
|
|
||||||
payload = (id_map, failed_ids, failed_covers)
|
payload = (id_map, failed_ids, failed_covers)
|
||||||
from calibre.gui2.dialogs.message_box import ProceedNotification
|
from calibre.gui2.dialogs.message_box import ProceedNotification
|
||||||
@ -158,49 +143,6 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
|
|
||||||
self.apply_metadata_changes(id_map)
|
self.apply_metadata_changes(id_map)
|
||||||
|
|
||||||
def download_metadata_old(self, checked, covers=True, set_metadata=True,
|
|
||||||
set_social_metadata=None):
|
|
||||||
rows = self.gui.library_view.selectionModel().selectedRows()
|
|
||||||
if not rows or len(rows) == 0:
|
|
||||||
d = error_dialog(self.gui, _('Cannot download metadata'),
|
|
||||||
_('No books selected'))
|
|
||||||
d.exec_()
|
|
||||||
return
|
|
||||||
db = self.gui.library_view.model().db
|
|
||||||
ids = [db.id(row.row()) for row in rows]
|
|
||||||
self.do_download_metadata(ids, covers=covers,
|
|
||||||
set_metadata=set_metadata,
|
|
||||||
set_social_metadata=set_social_metadata)
|
|
||||||
|
|
||||||
def do_download_metadata(self, ids, covers=True, set_metadata=True,
|
|
||||||
set_social_metadata=None):
|
|
||||||
m = self.gui.library_view.model()
|
|
||||||
db = m.db
|
|
||||||
if set_social_metadata is None:
|
|
||||||
get_social_metadata = config['get_social_metadata']
|
|
||||||
else:
|
|
||||||
get_social_metadata = set_social_metadata
|
|
||||||
from calibre.gui2.metadata.bulk_download import DoDownload
|
|
||||||
if set_social_metadata is not None and set_social_metadata:
|
|
||||||
x = _('social metadata')
|
|
||||||
else:
|
|
||||||
x = _('covers') if covers and not set_metadata else _('metadata')
|
|
||||||
title = _('Downloading {0} for {1} book(s)').format(x, len(ids))
|
|
||||||
self._download_book_metadata = DoDownload(self.gui, title, db, ids,
|
|
||||||
get_covers=covers, set_metadata=set_metadata,
|
|
||||||
get_social_metadata=get_social_metadata)
|
|
||||||
m.stop_metadata_backup()
|
|
||||||
try:
|
|
||||||
self._download_book_metadata.exec_()
|
|
||||||
finally:
|
|
||||||
m.start_metadata_backup()
|
|
||||||
cr = self.gui.library_view.currentIndex().row()
|
|
||||||
x = self._download_book_metadata
|
|
||||||
if x.updated:
|
|
||||||
self.gui.library_view.model().refresh_ids(
|
|
||||||
x.updated, cr)
|
|
||||||
if self.gui.cover_flow:
|
|
||||||
self.gui.cover_flow.dataChanged()
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def edit_metadata(self, checked, bulk=None):
|
def edit_metadata(self, checked, bulk=None):
|
||||||
@ -227,9 +169,7 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
list(range(self.gui.library_view.model().rowCount(QModelIndex())))
|
list(range(self.gui.library_view.model().rowCount(QModelIndex())))
|
||||||
current_row = row_list.index(cr)
|
current_row = row_list.index(cr)
|
||||||
|
|
||||||
func = (self.do_edit_metadata if test_eight_code else
|
changed, rows_to_refresh = self.do_edit_metadata(row_list, current_row)
|
||||||
self.do_edit_metadata_old)
|
|
||||||
changed, rows_to_refresh = func(row_list, current_row)
|
|
||||||
|
|
||||||
m = self.gui.library_view.model()
|
m = self.gui.library_view.model()
|
||||||
|
|
||||||
@ -244,36 +184,6 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
m.current_changed(current, previous)
|
m.current_changed(current, previous)
|
||||||
self.gui.tags_view.recount()
|
self.gui.tags_view.recount()
|
||||||
|
|
||||||
def do_edit_metadata_old(self, row_list, current_row):
|
|
||||||
changed = set([])
|
|
||||||
db = self.gui.library_view.model().db
|
|
||||||
|
|
||||||
while True:
|
|
||||||
prev = next_ = None
|
|
||||||
if current_row > 0:
|
|
||||||
prev = db.title(row_list[current_row-1])
|
|
||||||
if current_row < len(row_list) - 1:
|
|
||||||
next_ = db.title(row_list[current_row+1])
|
|
||||||
|
|
||||||
d = MetadataSingleDialog(self.gui, row_list[current_row], db,
|
|
||||||
prev=prev, next_=next_)
|
|
||||||
d.view_format.connect(lambda
|
|
||||||
fmt:self.gui.iactions['View'].view_format(row_list[current_row],
|
|
||||||
fmt))
|
|
||||||
ret = d.exec_()
|
|
||||||
d.break_cycles()
|
|
||||||
if ret != d.Accepted:
|
|
||||||
break
|
|
||||||
|
|
||||||
changed.add(d.id)
|
|
||||||
self.gui.library_view.model().refresh_ids(list(d.books_to_refresh))
|
|
||||||
if d.row_delta == 0:
|
|
||||||
break
|
|
||||||
current_row += d.row_delta
|
|
||||||
self.gui.library_view.set_current_row(current_row)
|
|
||||||
self.gui.library_view.scroll_to_row(current_row)
|
|
||||||
return changed, set()
|
|
||||||
|
|
||||||
def do_edit_metadata(self, row_list, current_row):
|
def do_edit_metadata(self, row_list, current_row):
|
||||||
from calibre.gui2.metadata.single import edit_metadata
|
from calibre.gui2.metadata.single import edit_metadata
|
||||||
db = self.gui.library_view.model().db
|
db = self.gui.library_view.model().db
|
||||||
@ -613,6 +523,7 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
self.applied_ids, cr)
|
self.applied_ids, cr)
|
||||||
if self.gui.cover_flow:
|
if self.gui.cover_flow:
|
||||||
self.gui.cover_flow.dataChanged()
|
self.gui.cover_flow.dataChanged()
|
||||||
|
self.gui.tags_view.recount()
|
||||||
|
|
||||||
self.apply_id_map = []
|
self.apply_id_map = []
|
||||||
self.apply_pd = None
|
self.apply_pd = None
|
||||||
|
@ -10,7 +10,7 @@ from PyQt4.Qt import QIcon, QMenu, Qt
|
|||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
from calibre.gui2.preferences.main import Preferences
|
from calibre.gui2.preferences.main import Preferences
|
||||||
from calibre.gui2 import error_dialog
|
from calibre.gui2 import error_dialog
|
||||||
from calibre.constants import DEBUG
|
from calibre.constants import DEBUG, isosx
|
||||||
|
|
||||||
class PreferencesAction(InterfaceAction):
|
class PreferencesAction(InterfaceAction):
|
||||||
|
|
||||||
@ -19,7 +19,8 @@ class PreferencesAction(InterfaceAction):
|
|||||||
|
|
||||||
def genesis(self):
|
def genesis(self):
|
||||||
pm = QMenu()
|
pm = QMenu()
|
||||||
pm.addAction(QIcon(I('config.png')), _('Preferences'), self.do_config)
|
acname = _('Change calibre behavior') if isosx else _('Preferences')
|
||||||
|
pm.addAction(QIcon(I('config.png')), acname, self.do_config)
|
||||||
pm.addAction(QIcon(I('wizard.png')), _('Run welcome wizard'),
|
pm.addAction(QIcon(I('wizard.png')), _('Run welcome wizard'),
|
||||||
self.gui.run_wizard)
|
self.gui.run_wizard)
|
||||||
if not DEBUG:
|
if not DEBUG:
|
||||||
|
@ -60,7 +60,7 @@ class ViewAction(InterfaceAction):
|
|||||||
|
|
||||||
def build_menus(self, db):
|
def build_menus(self, db):
|
||||||
self.view_menu.clear()
|
self.view_menu.clear()
|
||||||
self.view_menu.addAction(self.qaction)
|
self.view_menu.addAction(self.view_action)
|
||||||
self.view_menu.addAction(self.view_specific_action)
|
self.view_menu.addAction(self.view_specific_action)
|
||||||
self.view_menu.addSeparator()
|
self.view_menu.addSeparator()
|
||||||
self.view_menu.addAction(self.action_pick_random)
|
self.view_menu.addAction(self.action_pick_random)
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user