Merge branch 'kovidgoyal/master'

2025-07-08 10:44:09 -04:00 · 2013-07-13 09:20:39 +02:00 · 2013-07-13 09:20:39 +02:00 · b3dbda5492
commit b3dbda5492
parent 607a91e3e7 ba2618f03d
9 changed files with 369 additions and 13 deletions
--- a/recipes/icons/le_monde_diplomatique_fr.png
+++ b/recipes/icons/le_monde_diplomatique_fr.png
--- a/recipes/le_monde_diplomatique_fr.recipe
+++ b/recipes/le_monde_diplomatique_fr.recipe
@ -0,0 +1,111 @@
+# vim:fileencoding=utf-8
+from __future__ import unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2013'
+'''
+monde-diplomatique.fr
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds import feeds_from_index
+
+class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
+    title                  = u'Le Monde diplomatique.fr'
+    __author__             = 'Gaëtan Lehmann'
+    description            = "Le Monde diplomatique est un mensuel français d’information et d’opinion à la ligne éditoriale nettement engagée en faveur d'une gauche de rupture avec le capitalisme. Il aborde de nombreux sujets — géopolitique, relations internationales, économie, questions sociales, écologie, culture, médias, …"  # noqa
+    oldest_article         = 7
+    max_articles_per_feed  = 100
+    auto_cleanup = True
+    publisher              = 'monde-diplomatique.fr'
+    category               = 'news, France, world'
+    language               = 'fr'
+    masthead_url           = 'http://www.monde-diplomatique.fr/squelettes/images/logotyfa.png'
+    timefmt                = ' [%d %b %Y]'
+    no_stylesheets         = True
+
+    feeds                  = [(u'Blogs', u'http://blog.mondediplo.net/spip.php?page=backend'), (u'Archives', u'http://www.monde-diplomatique.fr/rss/')]
+
+    preprocess_regexps     = [
+        (re.compile(r'<title>(.*) - Les blogs du Diplo</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
+        (re.compile(r'<h2>(.*) - Les blogs du Diplo</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>'),
+        (re.compile(r'<title>(.*) \(Le Monde diplomatique\)</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
+        (re.compile(r'<h2>(.*) \(Le Monde diplomatique\)</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>'),
+        (re.compile(r'<h3>Grand format</h3>'), lambda m: '')]
+
+    remove_tags            = [dict(name='div', attrs={'class':'voiraussi liste'}),
+      dict(name='ul', attrs={'class':'hermetique carto hombre_demi_inverse'}),
+      dict(name='a', attrs={'class':'tousles'}),
+      dict(name='h3', attrs={'class':'cat'}),
+      dict(name='div', attrs={'class':'logodiplo'}),
+      dict(name='img', attrs={'class':'spip_logos'}),
+      dict(name='p', attrs={'id':'hierarchie'}),
+      dict(name='div', attrs={'class':'espace'})]
+
+    conversion_options     = {
+                              'comments'        : description
+                             ,'tags'            : category
+                             ,'language'        : language
+                             ,'publisher'       : publisher
+                             ,'linearize_tables': True
+                          }
+
+    remove_empty_feeds     = True
+
+    filterDuplicates       = True
+
+    # don't use parse_index - we need it to send an exception so we can mix
+    # feed and parse_index results in parse_feeds
+    def parse_index_valise(self):
+        articles = []
+        soup = self.index_to_soup('http://www.monde-diplomatique.fr/carnet/')
+        cnt = soup.find('ul',attrs={'class':'hermetique liste'})
+        for item in cnt.findAll('li'):
+            description = ''
+            feed_link = item.find('a')
+            desc = item.find('div',attrs={'class':'intro'})
+            date = item.find('div',attrs={'class':'dates_auteurs'})
+            if desc:
+                description = desc.string
+            if feed_link and feed_link.has_key('href'):
+                url   = 'http://www.monde-diplomatique.fr' + feed_link['href']
+                title = self.tag_to_string(feed_link)
+                articles.append({
+                                  'title'      :title
+                                 ,'date'       :date.string.strip()
+                                 ,'url'        :url
+                                 ,'description':description
+                                })
+        return [("La valise diplomatique", articles)]
+
+    def parse_index_cartes(self):
+        articles = []
+        soup = self.index_to_soup('http://www.monde-diplomatique.fr/cartes/')
+        cnt = soup.find('div',attrs={'class':'decale hermetique'})
+        for item in cnt.findAll('div',attrs={'class':re.compile('grid_3 filet hombre_demi')}):
+            feed_link = item.find('a',attrs={'class':'couve'})
+            h3 = item.find('h3')
+            authorAndDate = item.find('div',attrs={'class':'dates_auteurs'})
+            author, date = authorAndDate.string.strip().split(', ')
+            if feed_link and feed_link.has_key('href'):
+                url   = 'http://www.monde-diplomatique.fr' + feed_link['href']
+                title = self.tag_to_string(h3)
+                articles.append({
+                                  'title'      :title
+                                 ,'date'       :date
+                                 ,'url'        :url
+                                 ,'description': author
+                                })
+        return [("Cartes", articles)]
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        valise = feeds_from_index(self.parse_index_valise(), oldest_article=self.oldest_article,
+                                     max_articles_per_feed=self.max_articles_per_feed,
+                                    log=self.log)
+        cartes = feeds_from_index(self.parse_index_cartes(), oldest_article=self.oldest_article,
+                                     max_articles_per_feed=self.max_articles_per_feed,
+                                     log=self.log)
+        feeds = valise + feeds + cartes
+        return feeds
--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@ -8,7 +8,7 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 # Imports {{{
-import os, shutil, uuid, json, glob, time
+import os, shutil, uuid, json, glob, time, cPickle
 from functools import partial

 import apsw
@ -1216,5 +1216,27 @@ class DB(object):
    def get_ids_for_custom_book_data(self, name):
        return frozenset(r[0] for r in self.conn.execute('SELECT book FROM books_plugin_data WHERE name=?', (name,)))

+    def conversion_options(self, book_id, fmt):
+        for (data,) in self.conn.get('SELECT data FROM conversion_options WHERE book=? AND format=?', (book_id, fmt.upper())):
+            if data:
+                return cPickle.loads(bytes(data))
+
+    def has_conversion_options(self, ids, fmt='PIPE'):
+        ids = frozenset(ids)
+        self.conn.execute('DROP TABLE IF EXISTS conversion_options_temp; CREATE TEMP TABLE conversion_options_temp (id INTEGER PRIMARY KEY);')
+        self.conn.executemany('INSERT INTO conversion_options_temp VALUES (?)', [(x,) for x in ids])
+        for (book_id,) in self.conn.get(
+            'SELECT book FROM conversion_options WHERE format=? AND book IN (SELECT id FROM conversion_options_temp)', (fmt.upper(),)):
+            return True
+        return False
+
+    def delete_conversion_options(self, book_ids, fmt):
+        self.conn.executemany('DELETE FROM conversion_options WHERE book=? AND format=?',
+            [(book_id, fmt.upper()) for book_id in book_ids])
+
+    def set_conversion_options(self, options, fmt):
+        options = [(book_id, fmt.upper(), buffer(cPickle.dumps(data, -1))) for book_id, data in options.iteritems()]
+        self.conn.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options)
+
   # }}}

--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -265,8 +265,10 @@ class Cache(object):
            for name, field in self.fields.iteritems():
                if name[0] == '#' and name.endswith('_index'):
                    field.series_field = self.fields[name[:-len('_index')]]
+                    self.fields[name[:-len('_index')]].index_field = field
                elif name == 'series_index':
                    field.series_field = self.fields['series']
+                    self.fields['series'].index_field = field
                elif name == 'authors':
                    field.author_sort_field = self.fields['author_sort']
                elif name == 'title':
@ -1179,6 +1181,18 @@ class Cache(object):
            else:
                table.remove_books(book_ids, self.backend)

+    @write_api
+    def remove_items(self, field, item_ids):
+        ''' Delete all items in the specified field with the specified ids. Returns the set of affected book ids. '''
+        field = self.fields[field]
+        affected_books = field.table.remove_items(item_ids, self.backend)
+        if affected_books:
+            if hasattr(field, 'index_field'):
+                self._set_field(field.index_field.name, {bid:1.0 for bid in affected_books})
+            else:
+                self._mark_as_dirty(affected_books)
+        return affected_books
+
    @write_api
    def add_custom_book_data(self, name, val_map, delete_first=False):
        ''' Add data for name where val_map is a map of book_ids to values. If
@ -1208,6 +1222,22 @@ class Cache(object):
        ''' Return the set of book ids for which name has data. '''
        return self.backend.get_ids_for_custom_book_data(name)

+    @read_api
+    def conversion_options(self, book_id, fmt='PIPE'):
+        return self.backend.conversion_options(book_id, fmt)
+
+    @read_api
+    def has_conversion_options(self, ids, fmt='PIPE'):
+        return self.backend.has_conversion_options(ids, fmt)
+
+    @write_api
+    def delete_conversion_options(self, book_ids, fmt='PIPE'):
+        return self.backend.delete_conversion_options(book_ids, fmt)
+
+    @write_api
+    def set_conversion_options(self, options, fmt='PIPE'):
+        ''' options must be a map of the form {book_id:conversion_options} '''
+        return self.backend.set_conversion_options(options, fmt)

    # }}}

--- a/src/calibre/db/legacy.py
+++ b/src/calibre/db/legacy.py
@ -98,6 +98,13 @@ class LibraryDatabase(object):
                    return self.new_api.get_item_name(field, item_id)
                return func
            setattr(self, '%s_name' % field, MT(getter(field)))
+        for field in ('publisher', 'series', 'tag'):
+            def getter(field):
+                fname = 'tags' if field == 'tag' else field
+                def func(self, item_id):
+                    self.new_api.remove_items(fname, (item_id,))
+                return func
+            setattr(self, 'delete_%s_using_id' % field, MT(getter(field)))

        # Legacy field API
        for func in (
@ -383,6 +390,18 @@ class LibraryDatabase(object):
                        break
        return ans

+    def set_conversion_options(self, book_id, fmt, options):
+        self.new_api.set_conversion_options({book_id:options}, fmt=fmt)
+
+    def conversion_options(self, book_id, fmt):
+        return self.new_api.conversion_options(book_id, fmt=fmt)
+
+    def has_conversion_options(self, ids, format='PIPE'):
+        return self.new_api.has_conversion_options(ids, fmt=format)
+
+    def delete_conversion_options(self, book_id, fmt, commit=True):
+        self.new_api.delete_conversion_options((book_id,), fmt=fmt)
+
    # Private interface {{{
    def __iter__(self):
        for row in self.data.iterall():
--- a/src/calibre/db/tables.py
+++ b/src/calibre/db/tables.py
@ -204,6 +204,21 @@ class ManyToOneTable(Table):
                [(x,) for x in clean])
        return clean

+    def remove_items(self, item_ids, db):
+        affected_books = set()
+        for item_id in item_ids:
+            val = self.id_map.pop(item_id, null)
+            if val is null:
+                continue
+            book_ids = self.col_book_map.pop(item_id, set())
+            for book_id in book_ids:
+                self.book_col_map.pop(book_id, None)
+            affected_books.update(book_ids)
+        item_ids = tuple((x,) for x in item_ids)
+        db.conn.executemany('DELETE FROM {0} WHERE {1}=?'.format(self.link_table, self.metadata['link_column']), item_ids)
+        db.conn.executemany('DELETE FROM {0} WHERE id=?'.format(self.metadata['table']), item_ids)
+        return affected_books
+
 class ManyToManyTable(ManyToOneTable):

    '''
@ -250,6 +265,21 @@ class ManyToManyTable(ManyToOneTable):
                [(x,) for x in clean])
        return clean

+    def remove_items(self, item_ids, db):
+        affected_books = set()
+        for item_id in item_ids:
+            val = self.id_map.pop(item_id, null)
+            if val is null:
+                continue
+            book_ids = self.col_book_map.pop(item_id, set())
+            for book_id in book_ids:
+                self.book_col_map[book_id] = tuple(x for x in self.book_col_map.get(book_id, ()) if x != item_id)
+            affected_books.update(book_ids)
+        item_ids = tuple((x,) for x in item_ids)
+        db.conn.executemany('DELETE FROM {0} WHERE {1}=?'.format(self.link_table, self.metadata['link_column']), item_ids)
+        db.conn.executemany('DELETE FROM {0} WHERE id=?'.format(self.metadata['table']), item_ids)
+        return affected_books
+
 class AuthorsTable(ManyToManyTable):

    def read_id_maps(self, db):
@ -274,6 +304,9 @@ class AuthorsTable(ManyToManyTable):
            self.asort_map.pop(item_id, None)
        return clean

+    def remove_items(self, item_ids, db):
+        raise ValueError('Direct removal of authors is not allowed')
+
 class FormatsTable(ManyToManyTable):

    do_clean_on_remove = False
@ -331,6 +364,9 @@ class FormatsTable(ManyToManyTable):

        return {book_id:zero_max(book_id) for book_id in formats_map}

+    def remove_items(self, item_ids, db):
+        raise NotImplementedError('Cannot delete a format directly')
+
    def update_fmt(self, book_id, fmt, fname, size, db):
        fmts = list(self.book_col_map.get(book_id, []))
        try:
@ -381,4 +417,6 @@ class IdentifiersTable(ManyToManyTable):
                        clean.add(item_id)
        return clean

+    def remove_items(self, item_ids, db):
+        raise NotImplementedError('Direct deletion of identifiers is not implemented')

--- a/src/calibre/db/tests/legacy.py
+++ b/src/calibre/db/tests/legacy.py
@ -191,6 +191,52 @@ class LegacyTest(BaseTest):
        db.close()
        # }}}

+    def test_legacy_conversion_options(self):  # {{{
+        'Test conversion options API'
+        ndb = self.init_legacy()
+        db = self.init_old()
+        all_ids = ndb.new_api.all_book_ids()
+        op1, op2 = {'xx':'yy'}, {'yy':'zz'}
+        for x in (
+            ('has_conversion_options', all_ids),
+            ('conversion_options', 1, 'PIPE'),
+            ('set_conversion_options', 1, 'PIPE', op1),
+            ('has_conversion_options', all_ids),
+            ('conversion_options', 1, 'PIPE'),
+            ('delete_conversion_options', 1, 'PIPE'),
+            ('has_conversion_options', all_ids),
+        ):
+            meth, args = x[0], x[1:]
+            self.assertEqual((getattr(db, meth)(*args)), (getattr(ndb, meth)(*args)),
+                                 'The method: %s() returned different results for argument %s' % (meth, args))
+        db.close()
+    # }}}
+
+    def test_legacy_delete_using(self):  # {{{
+        'Test delete_using() API'
+        ndb = self.init_legacy()
+        db = self.init_old()
+        cache = ndb.new_api
+        tmap = cache.get_id_map('tags')
+        t = next(tmap.iterkeys())
+        pmap = cache.get_id_map('publisher')
+        p = next(pmap.iterkeys())
+        for x in (
+            ('delete_tag_using_id', t),
+            ('delete_publisher_using_id', p),
+            (db.refresh,),
+            ('all_tag_names',), ('tags', 0), ('tags', 1), ('tags', 2),
+            ('all_publisher_names',), ('publisher', 0), ('publisher', 1), ('publisher', 2),
+        ):
+            meth, args = x[0], x[1:]
+            if callable(meth):
+                meth(*args)
+            else:
+                self.assertEqual((getattr(db, meth)(*args)), (getattr(ndb, meth)(*args)),
+                                 'The method: %s() returned different results for argument %s' % (meth, args))
+        db.close()
+    # }}}
+
    def test_legacy_adding_books(self):  # {{{
        'Test various adding books methods'
        from calibre.ebooks.metadata.book.base import Metadata
@ -269,7 +315,10 @@ class LegacyTest(BaseTest):
            'books_in_old_database',  # unused

            # Internal API
-            'clean_user_categories',  'cleanup_tags',  'books_list_filter',
+            'clean_user_categories',  'cleanup_tags',  'books_list_filter', 'conn', 'connect', 'construct_file_name',
+            'construct_path_name', 'clear_dirtied', 'commit_dirty_cache', 'initialize_database', 'initialize_dynamic',
+            'run_import_plugins', 'vacuum', 'set_path', 'row', 'row_factory', 'rows', 'rmtree', 'series_index_pat',
+            'import_old_database', 'dirtied_lock', 'dirtied_cache', 'dirty_queue_length', 'dirty_books_referencing',
        }
        SKIP_ARGSPEC = {
            '__init__', 'get_next_series_num_for', 'has_book', 'author_sort_from_authors',
@ -280,7 +329,7 @@ class LegacyTest(BaseTest):
        try:
            total = 0
            for attr in dir(db):
-                if attr in SKIP_ATTRS:
+                if attr in SKIP_ATTRS or attr.startswith('upgrade_version'):
                    continue
                total += 1
                if not hasattr(ndb, attr):
@ -302,7 +351,7 @@ class LegacyTest(BaseTest):

        if missing:
            pc = len(missing)/total
-            raise AssertionError('{0:.1%} of API ({2} attrs) are missing. For example: {1}'.format(pc, ', '.join(missing[:5]), len(missing)))
+            raise AssertionError('{0:.1%} of API ({2} attrs) are missing: {1}'.format(pc, ', '.join(missing), len(missing)))

    # }}}

--- a/src/calibre/db/tests/writing.py
+++ b/src/calibre/db/tests/writing.py
@ -419,3 +419,58 @@ class WritingTest(BaseTest):

    # }}}

+    def test_conversion_options(self):  # {{{
+        ' Test saving of conversion options '
+        cache = self.init_cache()
+        all_ids = cache.all_book_ids()
+        self.assertFalse(cache.has_conversion_options(all_ids))
+        self.assertIsNone(cache.conversion_options(1))
+        op1, op2 = {'xx':'yy'}, {'yy':'zz'}
+        cache.set_conversion_options({1:op1, 2:op2})
+        self.assertTrue(cache.has_conversion_options(all_ids))
+        self.assertEqual(cache.conversion_options(1), op1)
+        self.assertEqual(cache.conversion_options(2), op2)
+        cache.set_conversion_options({1:op2})
+        self.assertEqual(cache.conversion_options(1), op2)
+        cache.delete_conversion_options(all_ids)
+        self.assertFalse(cache.has_conversion_options(all_ids))
+    # }}}
+
+    def test_remove_items(self):  # {{{
+        ' Test removal of many-(many,one) items '
+        cache = self.init_cache()
+        tmap = cache.get_id_map('tags')
+        self.assertEqual(cache.remove_items('tags', tmap), {1, 2})
+        tmap = cache.get_id_map('#tags')
+        t = {v:k for k, v in tmap.iteritems()}['My Tag Two']
+        self.assertEqual(cache.remove_items('#tags', (t,)), {1, 2})
+
+        smap = cache.get_id_map('series')
+        self.assertEqual(cache.remove_items('series', smap), {1, 2})
+        smap = cache.get_id_map('#series')
+        s = {v:k for k, v in smap.iteritems()}['My Series Two']
+        self.assertEqual(cache.remove_items('#series', (s,)), {1})
+
+        for c in (cache, self.init_cache()):
+            self.assertFalse(c.get_id_map('tags'))
+            self.assertFalse(c.all_field_names('tags'))
+            for bid in c.all_book_ids():
+                self.assertFalse(c.field_for('tags', bid))
+
+            self.assertEqual(len(c.get_id_map('#tags')), 1)
+            self.assertEqual(c.all_field_names('#tags'), {'My Tag One'})
+            for bid in c.all_book_ids():
+                self.assertIn(c.field_for('#tags', bid), ((), ('My Tag One',)))
+
+            for bid in (1, 2):
+                self.assertEqual(c.field_for('series_index', bid), 1.0)
+            self.assertFalse(c.get_id_map('series'))
+            self.assertFalse(c.all_field_names('series'))
+            for bid in c.all_book_ids():
+                self.assertFalse(c.field_for('series', bid))
+
+            self.assertEqual(c.field_for('series_index', 1), 1.0)
+            self.assertEqual(c.all_field_names('#series'), {'My Series One'})
+            for bid in c.all_book_ids():
+                self.assertIn(c.field_for('#series', bid), (None, 'My Series One'))
+    # }}}
--- a/src/calibre/gui2/store/stores/amazon_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 3 # Needed for dynamic plugin loading
+store_version = 4 # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -126,15 +126,47 @@ class AmazonKindleStore(StorePlugin):

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+            doc = html.fromstring(f.read())

-            data_xpath = '//div[contains(@class, "prod")]'
-            format_xpath = './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
-            asin_xpath = '@name'
-            cover_xpath = './/img[@class="productImage"]/@src'
-            title_xpath = './/h3[@class="newaps"]/a//text()'
-            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
-            price_xpath = './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
+            if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
+                data_xpath = '//div[contains(@class, "prod")]'
+                format_xpath = (
+                        './/ul[contains(@class, "rsltGridList")]'
+                        '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
+                asin_xpath = '@name'
+                cover_xpath = './/img[@class="productImage"]/@src'
+                title_xpath = './/h3[@class="newaps"]/a//text()'
+                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
+                price_xpath = (
+                        './/ul[contains(@class, "rsltGridList")]'
+                        '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
+            elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
+                data_xpath = '//li[(@class="ilo")]'
+                format_xpath = (
+                        './/ul[contains(@class, "rsltGridList")]'
+                        '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
+                asin_xpath = '@name'
+                cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
+                title_xpath = './/h3[@class="newaps"]/a//text()'
+                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
+                # Results can be in a grid (table) or a column
+                price_xpath = (
+                        './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
+                        '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
+            elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
+                data_xpath = '//div[contains(@class, "prod")]'
+                format_xpath = (
+                        './/ul[contains(@class, "rsltL")]'
+                        '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
+                asin_xpath = '@name'
+                cover_xpath = './/img[@class="productImage"]/@src'
+                title_xpath = './/h3[@class="newaps"]/a//text()'
+                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
+                price_xpath = (
+                        './/ul[contains(@class, "rsltL")]'
+                        '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
+            else:
+                return

            for data in doc.xpath(data_xpath):
                if counter <= 0: