A spot of refactoring

2025-07-09 03:04:10 -04:00 · 2016-12-02 09:50:17 +05:30 · 2016-12-02 09:50:17 +05:30 · f1484411af
commit f1484411af
parent 2b562831d7
1 changed files with 89 additions and 80 deletions
--- a/src/calibre/gui2/store/stores/manybooks_plugin.py
+++ b/src/calibre/gui2/store/stores/manybooks_plugin.py
@ -20,91 +20,100 @@ from calibre.utils.opensearch.description import Description
 from calibre.utils.opensearch.query import Query


+def search_manybooks(query, max_results=10, timeout=60, open_search_url='http://www.manybooks.net/opds/'):
+    '''
+    Manybooks uses a very strange opds feed. The opds
+    main feed is structured like a stanza feed. The
+    search result entries give very little information
+    and requires you to go to a detail link. The detail
+    link has the wrong type specified (text/html instead
+    of application/atom+xml).
+    '''
+
+    description = Description(open_search_url)
+    url_template = description.get_best_template()
+    if not url_template:
+        return
+    oquery = Query(url_template)
+
+    # set up initial values
+    oquery.searchTerms = query
+    oquery.count = max_results
+    url = oquery.url()
+
+    counter = max_results
+    br = browser()
+    with closing(br.open(url, timeout=timeout)) as f:
+        raw_data = f.read()
+        raw_data = raw_data.decode('utf-8', 'replace')
+        doc = etree.fromstring(raw_data)
+        for data in doc.xpath('//*[local-name() = "entry"]'):
+            if counter <= 0:
+                break
+
+            counter -= 1
+
+            s = SearchResult()
+
+            detail_links = data.xpath('./*[local-name() = "link" and @type = "text/html"]')
+            if not detail_links:
+                continue
+            detail_link = detail_links[0]
+            detail_href = detail_link.get('href')
+            if not detail_href:
+                continue
+
+            s.detail_item = 'http://manybooks.net/titles/' + detail_href.split('tid=')[-1] + '.html'
+            # These can have HTML inside of them. We are going to get them again later
+            # just in case.
+            s.title = ''.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
+            s.author = ', '.join(data.xpath('./*[local-name() = "author"]//text()')).strip()
+
+            # Follow the detail link to get the rest of the info.
+            with closing(br.open(detail_href, timeout=timeout/4)) as df:
+                ddoc = etree.fromstring(df.read())
+                ddata = ddoc.xpath('//*[local-name() = "entry"][1]')
+                if ddata:
+                    ddata = ddata[0]
+
+                    # This is the real title and author info we want. We got
+                    # it previously just in case it's not specified here for some reason.
+                    s.title = ''.join(ddata.xpath('./*[local-name() = "title"]//text()')).strip()
+                    s.author = ', '.join(ddata.xpath('./*[local-name() = "author"]//text()')).strip()
+                    if s.author.startswith(','):
+                        s.author = s.author[1:]
+                    if s.author.endswith(','):
+                        s.author = s.author[:-1]
+
+                    s.cover_url = ''.join(ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/thumbnail"][1]/@href')).strip()
+
+                    for link in ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'):
+                        type = link.get('type')
+                        href = link.get('href')
+                        if type:
+                            ext = mimetypes.guess_extension(type)
+                            if ext:
+                                ext = ext[1:].upper().strip()
+                                s.downloads[ext] = href
+
+            s.price = '$0.00'
+            s.drm = SearchResult.DRM_UNLOCKED
+            s.formats = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR'
+
+            yield s
+
+
 class ManyBooksStore(BasicStoreConfig, OpenSearchOPDSStore):

    open_search_url = 'http://www.manybooks.net/opds/'
    web_url = 'http://manybooks.net'

    def search(self, query, max_results=10, timeout=60):
-        '''
-        Manybooks uses a very strange opds feed. The opds
-        main feed is structured like a stanza feed. The
-        search result entries give very little information
-        and requires you to go to a detail link. The detail
-        link has the wrong type specified (text/html instead
-        of application/atom+xml).
-        '''
-        if not hasattr(self, 'open_search_url'):
-            return
+        for r in search_manybooks(query, max_results=max_results, timeout=timeout, open_search_url=self.open_search_url):
+            yield r

-        description = Description(self.open_search_url)
-        url_template = description.get_best_template()
-        if not url_template:
-            return
-        oquery = Query(url_template)

-        # set up initial values
-        oquery.searchTerms = query
-        oquery.count = max_results
-        url = oquery.url()
-
-        counter = max_results
-        br = browser()
-        with closing(br.open(url, timeout=timeout)) as f:
-            raw_data = f.read()
-            raw_data = raw_data.decode('utf-8', 'replace')
-            doc = etree.fromstring(raw_data)
-            for data in doc.xpath('//*[local-name() = "entry"]'):
-                if counter <= 0:
-                    break
-
-                counter -= 1
-
-                s = SearchResult()
-
-                detail_links = data.xpath('./*[local-name() = "link" and @type = "text/html"]')
-                if not detail_links:
-                    continue
-                detail_link = detail_links[0]
-                detail_href = detail_link.get('href')
-                if not detail_href:
-                    continue
-
-                s.detail_item = 'http://manybooks.net/titles/' + detail_href.split('tid=')[-1] + '.html'
-                # These can have HTML inside of them. We are going to get them again later
-                # just in case.
-                s.title = ''.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
-                s.author = ', '.join(data.xpath('./*[local-name() = "author"]//text()')).strip()
-
-                # Follow the detail link to get the rest of the info.
-                with closing(br.open(detail_href, timeout=timeout/4)) as df:
-                    ddoc = etree.fromstring(df.read())
-                    ddata = ddoc.xpath('//*[local-name() = "entry"][1]')
-                    if ddata:
-                        ddata = ddata[0]
-
-                        # This is the real title and author info we want. We got
-                        # it previously just in case it's not specified here for some reason.
-                        s.title = ''.join(ddata.xpath('./*[local-name() = "title"]//text()')).strip()
-                        s.author = ', '.join(ddata.xpath('./*[local-name() = "author"]//text()')).strip()
-                        if s.author.startswith(','):
-                            s.author = s.author[1:]
-                        if s.author.endswith(','):
-                            s.author = s.author[:-1]
-
-                        s.cover_url = ''.join(ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/thumbnail"][1]/@href')).strip()
-
-                        for link in ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'):
-                            type = link.get('type')
-                            href = link.get('href')
-                            if type:
-                                ext = mimetypes.guess_extension(type)
-                                if ext:
-                                    ext = ext[1:].upper().strip()
-                                    s.downloads[ext] = href
-
-                s.price = '$0.00'
-                s.drm = SearchResult.DRM_UNLOCKED
-                s.formats = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR'
-
-                yield s
+if __name__ == '__main__':
+    import sys
+    for result in search_manybooks(' '.join(sys.argv[1:])):
+        print (result)