diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index ce964e0104..3ccc07040b 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -507,7 +507,7 @@ from calibre.devices.kobo.driver import KOBO from calibre.devices.bambook.driver import BAMBOOK from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ - LibraryThing + KentDistrictLibrary from calibre.ebooks.metadata.douban import DoubanBooks from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers from calibre.ebooks.metadata.covers import OpenLibraryCovers, \ @@ -517,7 +517,7 @@ from calibre.ebooks.epub.fix.unmanifested import Unmanifested from calibre.ebooks.epub.fix.epubcheck import Epubcheck plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, - LibraryThing, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, + KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers, NiceBooksCovers] plugins += [ diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py index bd8d96a399..4f246b2b9a 100644 --- a/src/calibre/ebooks/metadata/fetch.py +++ b/src/calibre/ebooks/metadata/fetch.py @@ -247,30 +247,24 @@ class Amazon(MetadataSource): # {{{ # }}} -class LibraryThing(MetadataSource): # {{{ +class KentDistrictLibrary(MetadataSource): # {{{ - name = 'LibraryThing' + name = 'Kent District Library' metadata_type = 'social' - description = _('Downloads series/covers/rating information from librarything.com') + description = _('Downloads series information from ww2.kdl.org') def fetch(self): - if not self.isbn or not self.site_customization: + if not self.title or not self.book_author: return - from calibre.ebooks.metadata.library_thing import get_social_metadata - un, _, pw = self.site_customization.partition(':') + from calibre.ebooks.metadata.kdl import get_series try: - self.results = get_social_metadata(self.title, self.book_author, - self.publisher, self.isbn, username=un, password=pw) + self.results = get_series(self.title, self.book_author) except Exception, e: + import traceback + traceback.print_exc() self.exception = e self.tb = traceback.format_exc() - @property - def string_customization_help(self): - ans = _('To use librarything.com you must sign up for a %sfree account%s ' - 'and enter your username and password separated by a : below.') - return '

'+ans%('', '') - # }}} diff --git a/src/calibre/ebooks/metadata/kdl.py b/src/calibre/ebooks/metadata/kdl.py new file mode 100644 index 0000000000..4eca49ad45 --- /dev/null +++ b/src/calibre/ebooks/metadata/kdl.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import re, urllib, urlparse + +from calibre.ebooks.metadata.book.base import Metadata +from calibre import browser +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.chardet import xml_to_unicode + +URL = \ +"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping=" + +_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033]) + +def get_series(title, authors): + mi = Metadata(title, authors) + if title and title[0] in _ignore_starts: + title = title[1:] + title = re.sub(r'^(A|The|An)\s+', '', title).strip() + if not title: + return mi + if isinstance(title, unicode): + title = title.encode('utf-8') + + title = urllib.quote_plus(title) + + author = authors[0].strip() + if not author: + return mi + if ',' in author: + author = author.split(',')[0] + else: + author = author.split()[-1] + + url = URL.format(author, title) + br = browser() + raw = br.open(url).read() + if 'see the full results' not in raw: + return mi + raw = xml_to_unicode(raw)[0] + soup = BeautifulSoup(raw) + searcharea = soup.find('div', attrs={'class':'searcharea'}) + if searcharea is None: + return mi + ss = searcharea.find('div', attrs={'class':'seriessearch'}) + if ss is None: + return mi + a = ss.find('a', href=True) + if a is None: + return mi + href = a['href'].partition('?')[-1] + data = urlparse.parse_qs(href) + series = data.get('SeriesName', []) + if not series: + return mi + series = series[0] + series = re.sub(r' series$', '', series).strip() + if series: + mi.series = series + ns = ss.nextSibling + if ns.contents: + raw = unicode(ns.contents[0]) + raw = raw.partition('.')[0].strip() + try: + mi.series_index = int(raw) + except: + pass + return mi + + +if __name__ == '__main__': + import sys + print get_series(sys.argv[-2], [sys.argv[-1]]) +