From 457725ee5a9807d1b31a8deb704ced15d2f5ef29 Mon Sep 17 00:00:00 2001 From: Don-Swanson <32144818+Don-Swanson@users.noreply.github.com> Date: Tue, 30 Sep 2025 20:32:51 -0500 Subject: [PATCH] Enhance link extraction logic in search function to handle cases with no result containers, improving robustness and accuracy of results. --- .github/workflows/pypi.yml | 20 +++++++++++--- app/routes.py | 55 ++++++++++++++++++++++++-------------- 2 files changed, 52 insertions(+), 23 deletions(-) diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index ac4ecba..7a8a9f6 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -38,23 +38,37 @@ jobs: password: ${{ secrets.TEST_PYPI_API_TOKEN }} repository_url: https://test.pypi.org/legacy/ publish: - # Gate real PyPI publishing to stable SemVer tags only (e.g., v1.2.3 or 1.2.3) - if: startsWith(github.ref, 'refs/tags/') && (github.ref_name matches '^v?\\d+\\.\\d+\\.\\d+$') + # Gate real PyPI publishing to stable SemVer tags only + if: startsWith(github.ref, 'refs/tags/') name: Build and publish to PyPI runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Check if stable release + id: check_tag + run: | + TAG="${{ github.ref_name }}" + if echo "$TAG" | grep -qE '^v?[0-9]+\.[0-9]+\.[0-9]+$'; then + echo "is_stable=true" >> $GITHUB_OUTPUT + echo "Tag '$TAG' is a stable release. Will publish to PyPI." + else + echo "is_stable=false" >> $GITHUB_OUTPUT + echo "Tag '$TAG' is not a stable release (contains pre-release suffix). Skipping PyPI publish." + fi - name: Set up Python 3.9 + if: steps.check_tag.outputs.is_stable == 'true' uses: actions/setup-python@v5 with: python-version: 3.9 - name: Install pypa/build + if: steps.check_tag.outputs.is_stable == 'true' run: >- python -m pip install build --user - name: Build binary wheel and source tarball + if: steps.check_tag.outputs.is_stable == 'true' run: >- python -m build @@ -63,7 +77,7 @@ jobs: --outdir dist/ . - name: Publish distribution to PyPI - if: startsWith(github.ref, 'refs/tags') + if: steps.check_tag.outputs.is_stable == 'true' uses: pypa/gh-action-pypi-publish@master with: password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/app/routes.py b/app/routes.py index 0dfd31a..b63806e 100644 --- a/app/routes.py +++ b/app/routes.py @@ -411,28 +411,43 @@ def search(): # Find all result containers (using known result classes) result_divs = json_soup.find_all('div', class_=['ZINbbc', 'ezO2md']) - for div in result_divs: - # Find the first valid link in this result container - link = None - for a in div.find_all('a', href=True): - if a['href'].startswith('http'): - link = a - break - - if not link: - continue + if result_divs: + # Process structured Google results with container divs + for div in result_divs: + # Find the first valid link in this result container + link = None + for a in div.find_all('a', href=True): + if a['href'].startswith('http'): + link = a + break - href = link['href'] - if href in seen: - continue - - # Get all text from the result container, not just the link - text = div.get_text(separator=' ', strip=True) - if not text: - continue + if not link: + continue + + href = link['href'] + if href in seen: + continue - seen.add(href) - results.append({'href': href, 'text': text}) + # Get all text from the result container, not just the link + text = div.get_text(separator=' ', strip=True) + if not text: + continue + + seen.add(href) + results.append({'href': href, 'text': text}) + else: + # Fallback: extract links directly if no result containers found + for a in json_soup.find_all('a', href=True): + href = a['href'] + if not href.startswith('http'): + continue + if href in seen: + continue + text = a.get_text(strip=True) + if not text: + continue + seen.add(href) + results.append({'href': href, 'text': text}) return jsonify({ 'query': urlparse.unquote(query),