Add plugin to download series information from the Kent District Library

2025-07-09 03:04:10 -04:00 · 2011-02-11 10:31:30 -07:00 · 2011-02-11 10:31:30 -07:00 · 10f4b9c0f9
commit 10f4b9c0f9
parent 65dd616aac
3 changed files with 89 additions and 16 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -507,7 +507,7 @@ from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK

 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
-    LibraryThing
+    KentDistrictLibrary
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
@ -517,7 +517,7 @@ from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck

 plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        LibraryThing, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
        Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
        NiceBooksCovers]
 plugins += [
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -247,30 +247,24 @@ class Amazon(MetadataSource): # {{{

    # }}}

-class LibraryThing(MetadataSource): # {{{
+class KentDistrictLibrary(MetadataSource): # {{{

-    name = 'LibraryThing'
+    name = 'Kent District Library'
    metadata_type = 'social'
-    description = _('Downloads series/covers/rating information from librarything.com')
+    description = _('Downloads series information from ww2.kdl.org')

    def fetch(self):
-        if not self.isbn or not self.site_customization:
+        if not self.title or not self.book_author:
            return
-        from calibre.ebooks.metadata.library_thing import get_social_metadata
-        un, _, pw = self.site_customization.partition(':')
+        from calibre.ebooks.metadata.kdl import get_series
        try:
-            self.results = get_social_metadata(self.title, self.book_author,
-                    self.publisher, self.isbn, username=un, password=pw)
+            self.results = get_series(self.title, self.book_author)
        except Exception, e:
+            import traceback
+            traceback.print_exc()
            self.exception = e
            self.tb = traceback.format_exc()

-    @property
-    def string_customization_help(self):
-        ans = _('To use librarything.com you must sign up for a %sfree account%s '
-                'and enter your username and password separated by a : below.')
-        return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
-
    # }}}


--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re, urllib, urlparse
+
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre import browser
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.chardet import xml_to_unicode
+
+URL = \
+"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
+
+_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
+
+def get_series(title, authors):
+    mi = Metadata(title, authors)
+    if title and title[0] in _ignore_starts:
+        title = title[1:]
+    title = re.sub(r'^(A|The|An)\s+', '', title).strip()
+    if not title:
+        return mi
+    if isinstance(title, unicode):
+        title = title.encode('utf-8')
+
+    title = urllib.quote_plus(title)
+
+    author = authors[0].strip()
+    if not author:
+        return mi
+    if ',' in author:
+        author = author.split(',')[0]
+    else:
+        author = author.split()[-1]
+
+    url = URL.format(author, title)
+    br = browser()
+    raw = br.open(url).read()
+    if 'see the full results' not in raw:
+        return mi
+    raw = xml_to_unicode(raw)[0]
+    soup = BeautifulSoup(raw)
+    searcharea = soup.find('div', attrs={'class':'searcharea'})
+    if searcharea is None:
+        return mi
+    ss = searcharea.find('div', attrs={'class':'seriessearch'})
+    if ss is None:
+        return mi
+    a = ss.find('a', href=True)
+    if a is None:
+        return mi
+    href = a['href'].partition('?')[-1]
+    data = urlparse.parse_qs(href)
+    series = data.get('SeriesName', [])
+    if not series:
+        return mi
+    series = series[0]
+    series = re.sub(r' series$', '', series).strip()
+    if series:
+        mi.series = series
+    ns = ss.nextSibling
+    if ns.contents:
+        raw = unicode(ns.contents[0])
+        raw = raw.partition('.')[0].strip()
+        try:
+            mi.series_index = int(raw)
+        except:
+            pass
+    return mi
+
+
+if __name__ == '__main__':
+    import sys
+    print get_series(sys.argv[-2], [sys.argv[-1]])
+