Add plugin to download series information from the Kent District Library

2025-08-11 09:13:57 -04:00 · 2011-02-11 10:31:30 -07:00 · 2011-02-11 10:31:30 -07:00 · 10f4b9c0f9
commit 10f4b9c0f9
parent 65dd616aac
3 changed files with 89 additions and 16 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -507,7 +507,7 @@ from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
-    LibraryThing
+    KentDistrictLibrary
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
@ -517,7 +517,7 @@ from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        LibraryThing, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
        Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
        NiceBooksCovers]
 plugins += [
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -247,30 +247,24 @@ class Amazon(MetadataSource): # {{{
    # }}}
-class LibraryThing(MetadataSource): # {{{
+class KentDistrictLibrary(MetadataSource): # {{{
-    name = 'LibraryThing'
+    name = 'Kent District Library'
    metadata_type = 'social'
-    description = _('Downloads series/covers/rating information from librarything.com')
+    description = _('Downloads series information from ww2.kdl.org')
    def fetch(self):
-        if not self.isbn or not self.site_customization:
+        if not self.title or not self.book_author:
            return
-        from calibre.ebooks.metadata.library_thing import get_social_metadata
+        from calibre.ebooks.metadata.kdl import get_series
        un, _, pw = self.site_customization.partition(':')
        try:
-            self.results = get_social_metadata(self.title, self.book_author,
+            self.results = get_series(self.title, self.book_author)
                    self.publisher, self.isbn, username=un, password=pw)
        except Exception, e:
            import traceback
            traceback.print_exc()
            self.exception = e
            self.tb = traceback.format_exc()
    @property
    def string_customization_help(self):
        ans = _('To use librarything.com you must sign up for a %sfree account%s '
                'and enter your username and password separated by a : below.')
        return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
    # }}}
--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@ -0,0 +1,79 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re, urllib, urlparse
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre import browser
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.chardet import xml_to_unicode
 URL = \
 "http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
 _ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
 def get_series(title, authors):
    mi = Metadata(title, authors)
    if title and title[0] in _ignore_starts:
        title = title[1:]
    title = re.sub(r'^(A|The|An)\s+', '', title).strip()
    if not title:
        return mi
    if isinstance(title, unicode):
        title = title.encode('utf-8')
    title = urllib.quote_plus(title)
    author = authors[0].strip()
    if not author:
        return mi
    if ',' in author:
        author = author.split(',')[0]
    else:
        author = author.split()[-1]
    url = URL.format(author, title)
    br = browser()
    raw = br.open(url).read()
    if 'see the full results' not in raw:
        return mi
    raw = xml_to_unicode(raw)[0]
    soup = BeautifulSoup(raw)
    searcharea = soup.find('div', attrs={'class':'searcharea'})
    if searcharea is None:
        return mi
    ss = searcharea.find('div', attrs={'class':'seriessearch'})
    if ss is None:
        return mi
    a = ss.find('a', href=True)
    if a is None:
        return mi
    href = a['href'].partition('?')[-1]
    data = urlparse.parse_qs(href)
    series = data.get('SeriesName', [])
    if not series:
        return mi
    series = series[0]
    series = re.sub(r' series$', '', series).strip()
    if series:
        mi.series = series
    ns = ss.nextSibling
    if ns.contents:
        raw = unicode(ns.contents[0])
        raw = raw.partition('.')[0].strip()
        try:
            mi.series_index = int(raw)
        except:
            pass
    return mi
 if __name__ == '__main__':
    import sys
    print get_series(sys.argv[-2], [sys.argv[-1]])