From efcf6efa9c083a619fce03b039c721ad8bb8f38b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 15 May 2011 18:49:55 -0400
Subject: [PATCH] Store: Add archive.org plugin.

---
 src/calibre/customize/builtins.py            | 10 ++-
 src/calibre/gui2/store/archive_org_plugin.py | 89 ++++++++++++++++++++
 2 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 src/calibre/gui2/store/archive_org_plugin.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index ce8c7f71cb..2677e008ae 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -1107,6 +1107,12 @@ class StoreAmazonUKKindleStore(StoreBase):
     description = _('Kindle books from Amazon.uk')
     actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
 
+class StoreArchiveOrgStore(StoreBase):
+    name = 'Archive.org'
+    description = _('Rree Books : Download & Streaming : Ebook  and Texts Archive : Internet Archive')
+    actual_plugin = 'calibre.gui2.store.archive_org_plugin:ArchiveOrgStore'
+
+
 class StoreBaenWebScriptionStore(StoreBase):
     name = 'Baen WebScription'
     description = _('Ebooks for readers.')
@@ -1202,8 +1208,8 @@ class StoreWizardsTowerBooksStore(StoreBase):
     description = 'Wizard\'s Tower Press'
     actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
 
-plugins += [StoreAmazonKindleStore, StoreAmazonDEKindleStore, StoreAmazonUKKindleStore,
-    StoreBaenWebScriptionStore, StoreBNStore,
+plugins += [StoreArchiveOrgStore, StoreAmazonKindleStore, StoreAmazonDEKindleStore,
+    StoreAmazonUKKindleStore, StoreBaenWebScriptionStore, StoreBNStore,
     StoreBeamEBooksDEStore, StoreBeWriteStore,
     StoreDieselEbooksStore, StoreEbookscomStore, StoreEPubBuyDEStore,
     StoreEHarlequinStore, StoreFeedbooksStore,
diff --git a/src/calibre/gui2/store/archive_org_plugin.py b/src/calibre/gui2/store/archive_org_plugin.py
new file mode 100644
index 0000000000..e8e96b3839
--- /dev/null
+++ b/src/calibre/gui2/store/archive_org_plugin.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import urllib
+from contextlib import closing
+
+from lxml import html
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser, url_slash_cleaner
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.search_result import SearchResult
+from calibre.gui2.store.web_store_dialog import WebStoreDialog
+
+class ArchiveOrgStore(BasicStoreConfig, StorePlugin):
+
+    def open(self, parent=None, detail_item=None, external=False):
+        url = 'http://www.archive.org/details/texts'
+        
+        if detail_item:
+            detail_item = url_slash_cleaner('http://www.archive.org' + detail_item)
+
+        if external or self.config.get('open_external', False):
+            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
+        else:
+            d = WebStoreDialog(self.gui, url, parent, detail_item)
+            d.setWindowTitle(self.name)
+            d.set_tags(self.config.get('tags', ''))
+            d.exec_()
+
+    def search(self, query, max_results=10, timeout=60):
+        query = query + ' AND mediatype:texts'
+        url = 'http://www.archive.org/search.php?query=' + urllib.quote(query)
+        
+        br = browser()
+        
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read())
+            for data in doc.xpath('//td[@class="hitCell"]'):
+                if counter <= 0:
+                    break
+
+                id = ''.join(data.xpath('.//a[@class="titleLink"]/@href'))
+                if not id:
+                    continue
+
+                title = ''.join(data.xpath('.//a[@class="titleLink"]//text()'))
+                authors = data.xpath('.//text()')
+                if not authors:
+                    continue
+                author = None
+                for a in authors:
+                    if '-' in a:
+                        author = a.replace('-', ' ').strip()
+                        if author:
+                            break
+                if not author:
+                    continue
+
+                counter -= 1
+                
+                s = SearchResult()
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = '$0.00'
+                s.detail_item = id.strip()
+                s.drm = SearchResult.DRM_UNLOCKED
+                
+                yield s
+
+    def get_details(self, search_result, timeout):
+        url = url_slash_cleaner('http://www.archive.org' + search_result.detail_item)
+
+        br = browser()
+        with closing(br.open(url, timeout=timeout)) as nf:
+            idata = html.fromstring(nf.read())
+            formats = ', '.join(idata.xpath('//p[@id="dl" and @class="content"]//a/text()'))
+            search_result.formats = formats.upper()
+            
+        return True