Get Books: Fix Baen Webscription and O'Reilly stores. Fix price detection for Google Books

2025-11-27 08:45:00 -05:00 · 2012-02-19 00:12:05 +05:30 · 2012-02-19 00:12:05 +05:30 · 228a619c18
commit 228a619c18
parent 2653da4ecf bb93eecbe7
4 changed files with 22 additions and 30 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1217,7 +1217,7 @@ class StoreArchiveOrgStore(StoreBase):
    formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
 class StoreBaenWebScriptionStore(StoreBase):
-    name = 'Baen WebScription'
+    name = 'Baen Ebooks'
    description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
    actual_plugin = 'calibre.gui2.store.stores.baen_webscription_plugin:BaenWebScriptionStore'
--- a/src/calibre/gui2/store/stores/baen_webscription_plugin.py
+++ b/src/calibre/gui2/store/stores/baen_webscription_plugin.py
@ -24,7 +24,7 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://www.webscription.net/'
+        url = 'http://www.baenebooks.com/'
        if external or self.config.get('open_external', False):
            if detail_item:
@ -40,19 +40,19 @@ class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
-        url = 'http://www.webscription.net/searchadv.aspx?IsSubmit=true&SearchTerm=' + urllib2.quote(query)
+        url = 'http://www.baenebooks.com/searchadv.aspx?IsSubmit=true&SearchTerm=' + urllib2.quote(query)
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//table/tr/td/img[@src="skins/Skin_1/images/matchingproducts.gif"]/..//tr'):
+            for data in doc.xpath('//table//table//table//table//tr'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('./td[1]/a/@href'))
-                if not id:
+                if not id or not id.startswith('p-'):
                    continue
                title = ''.join(data.xpath('./td[1]/a/text()'))
@ -61,7 +61,7 @@ class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
                cover_url = ''
                price = ''
-                with closing(br.open('http://www.webscription.net/' + id.strip(), timeout=timeout/4)) as nf:
+                with closing(br.open('http://www.baenebooks.com/' + id.strip(), timeout=timeout/4)) as nf:
                    idata = html.fromstring(nf.read())
                    author = ''.join(idata.xpath('//span[@class="ProductNameText"]/../b/text()'))
                    author = author.split('by ')[-1]
@ -74,7 +74,7 @@ class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
                    if mo:
                        pnum = mo.group('num')
                    if pnum:
-                        cover_url = 'http://www.webscription.net/' + ''.join(idata.xpath('//img[@id="ProductPic%s"]/@src' % pnum))
+                        cover_url = 'http://www.baenebooks.com/' + ''.join(idata.xpath('//img[@id="ProductPic%s"]/@src' % pnum))
                counter -= 1
--- a/src/calibre/gui2/store/stores/google_books_plugin.py
+++ b/src/calibre/gui2/store/stores/google_books_plugin.py
@ -93,16 +93,13 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
            search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src'))
            # Try to get the set price.
-            price = ''.join(doc.xpath('//div[@class="buy-price-container"]/span[contains(@class, "buy-price")]/text()'))
+            price = ''.join(doc.xpath('//div[@id="gb-get-book-container"]//a/text()'))
-            # Try to get the price inside of a buy button.
+            if 'read' in price.lower():
            if not price.strip():
                price = ''.join(doc.xpath('//div[@class="buy-container"]/a/text()'))
                price = price.split('-')[-1]
            if 'view' in price.lower():
                price = 'Unknown'
-            # No price set for this book.
+            elif 'free' in price.lower() or not price.strip():
            if not price.strip():
                price = '$0.00'
            elif '-' in price:
                a, b, price = price.partition(' - ')
            search_result.price = price.strip()
            search_result.formats = ', '.join(doc.xpath('//div[contains(@class, "download-panel-div")]//a/text()')).upper()
--- a/src/calibre/gui2/store/stores/oreilly_plugin.py
+++ b/src/calibre/gui2/store/stores/oreilly_plugin.py
@ -26,9 +26,6 @@ class OReillyStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
        url = 'http://oreilly.com/ebooks/'
        if detail_item:
            detail_item = 'https://epoch.oreilly.com/shop/cart.orm?prod=%s.EBOOK&p=CALIBRE' % detail_item
        if external or self.config.get('open_external', False):
            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
        else:
@ -49,11 +46,11 @@ class OReillyStore(BasicStoreConfig, StorePlugin):
                if counter <= 0:
                    break
-                full_id = ''.join(data.xpath('./div[@class="book_text"]//p[@class="title"]/a/@href'))
+                ebook = ' '.join(data.xpath('.//p[@class="note"]/text()'))
-                mo = re.search('\d+', full_id)
+                if 'ebook' not in ebook.lower():
                if not mo:
                    continue
-                id = mo.group()
+                
                id = ''.join(data.xpath('./div[@class="book_text"]//p[@class="title"]/a/@href'))
                cover_url = ''.join(data.xpath('./a/img[1]/@src'))
@ -62,16 +59,14 @@ class OReillyStore(BasicStoreConfig, StorePlugin):
                author = author.split('By ')[-1].strip()
                # Get the detail here because we need to get the ebook id for the detail_item.
-                with closing(br.open(full_id, timeout=timeout)) as nf:
+                with closing(br.open(id, timeout=timeout)) as nf:
                    idoc = html.fromstring(nf.read())
-                    price = ''.join(idoc.xpath('(//span[@class="price"])[1]/span//text()'))
+                    for td in idoc.xpath('//td[@class="optionsTd"]'):
-                    formats = ', '.join(idoc.xpath('//div[@class="ebook_formats"]//a/text()'))
+                        if 'ebook' in ''.join(td.xpath('.//text()')).lower():
-                    
+                            price = ''.join(td.xpath('.//span[@class="price"]/text()')).strip()
-                    eid = ''.join(idoc.xpath('(//a[@class="product_buy_link" and contains(@href, ".EBOOK")])[1]/@href')).strip()
+                            formats = ''.join(td.xpath('.//a[@id="availableFormats"]/text()')).strip()
-                    mo = re.search('\d+', eid)
+                            break
                    if mo:
                        id = mo.group()
                counter -= 1