Add plugin to download series information from the Kent District Library

This commit is contained in:
Kovid Goyal 2011-02-11 10:31:30 -07:00
parent 65dd616aac
commit 10f4b9c0f9
3 changed files with 89 additions and 16 deletions

View File

@ -507,7 +507,7 @@ from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
LibraryThing
KentDistrictLibrary
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
@ -517,7 +517,7 @@ from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
LibraryThing, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
NiceBooksCovers]
plugins += [

View File

@ -247,30 +247,24 @@ class Amazon(MetadataSource): # {{{
# }}}
class LibraryThing(MetadataSource): # {{{
class KentDistrictLibrary(MetadataSource): # {{{
name = 'LibraryThing'
name = 'Kent District Library'
metadata_type = 'social'
description = _('Downloads series/covers/rating information from librarything.com')
description = _('Downloads series information from ww2.kdl.org')
def fetch(self):
if not self.isbn or not self.site_customization:
if not self.title or not self.book_author:
return
from calibre.ebooks.metadata.library_thing import get_social_metadata
un, _, pw = self.site_customization.partition(':')
from calibre.ebooks.metadata.kdl import get_series
try:
self.results = get_social_metadata(self.title, self.book_author,
self.publisher, self.isbn, username=un, password=pw)
self.results = get_series(self.title, self.book_author)
except Exception, e:
import traceback
traceback.print_exc()
self.exception = e
self.tb = traceback.format_exc()
@property
def string_customization_help(self):
ans = _('To use librarything.com you must sign up for a %sfree account%s '
'and enter your username and password separated by a : below.')
return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
# }}}

View File

@ -0,0 +1,79 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, urllib, urlparse
from calibre.ebooks.metadata.book.base import Metadata
from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import xml_to_unicode
URL = \
"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
def get_series(title, authors):
mi = Metadata(title, authors)
if title and title[0] in _ignore_starts:
title = title[1:]
title = re.sub(r'^(A|The|An)\s+', '', title).strip()
if not title:
return mi
if isinstance(title, unicode):
title = title.encode('utf-8')
title = urllib.quote_plus(title)
author = authors[0].strip()
if not author:
return mi
if ',' in author:
author = author.split(',')[0]
else:
author = author.split()[-1]
url = URL.format(author, title)
br = browser()
raw = br.open(url).read()
if 'see the full results' not in raw:
return mi
raw = xml_to_unicode(raw)[0]
soup = BeautifulSoup(raw)
searcharea = soup.find('div', attrs={'class':'searcharea'})
if searcharea is None:
return mi
ss = searcharea.find('div', attrs={'class':'seriessearch'})
if ss is None:
return mi
a = ss.find('a', href=True)
if a is None:
return mi
href = a['href'].partition('?')[-1]
data = urlparse.parse_qs(href)
series = data.get('SeriesName', [])
if not series:
return mi
series = series[0]
series = re.sub(r' series$', '', series).strip()
if series:
mi.series = series
ns = ss.nextSibling
if ns.contents:
raw = unicode(ns.contents[0])
raw = raw.partition('.')[0].strip()
try:
mi.series_index = int(raw)
except:
pass
return mi
if __name__ == '__main__':
import sys
print get_series(sys.argv[-2], [sys.argv[-1]])