Work on automatic model choice

This commit is contained in:
Kovid Goyal 2025-09-03 06:46:26 +05:30
parent 06b9c221e7
commit 60b2e7a6ca
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 122 additions and 6 deletions

View File

@ -33,6 +33,18 @@ class ChatMessage(NamedTuple):
return escape(self.query).replace('\n', '<br>')
class ChatResponse(NamedTuple):
content: str = ''
cost: float = 0
currency: str = 'USD'
exception: Exception | None = None
traceback: str = ''
class NoFreeModels(Exception):
pass
class AICapabilities(Flag):
none = auto()
text_to_text = auto()

View File

@ -5,13 +5,14 @@ import datetime
import json
import os
import tempfile
from collections.abc import Iterator, Sequence
from contextlib import closing, suppress
from functools import lru_cache
from threading import Thread
from typing import Any, NamedTuple
from calibre import browser
from calibre.ai import AICapabilities
from calibre.ai import AICapabilities, ChatMessage, ChatResponse, NoFreeModels
from calibre.ai.open_router import OpenRouterAI
from calibre.ai.prefs import pref_for_provider
from calibre.constants import __version__, cache_dir
@ -72,7 +73,7 @@ def schedule_update_of_cached_models_data(cache_loc):
@lru_cache(2)
def get_available_models():
def get_available_models() -> dict[str, 'Model']:
cache_loc = os.path.join(cache_dir(), 'openrouter', 'models-v1.json')
with suppress(OSError):
data = json.loads(atomic_read(cache_loc))
@ -121,6 +122,21 @@ class Model(NamedTuple):
capabilities: AICapabilities
tokenizer: str
@property
def creator(self) -> str:
return self.name.partition(':')[0].lower()
@property
def family(self) -> str:
parts = self.name.split(':')
if len(parts) > 1:
return parts[1].strip().partition(' ')[0].lower()
return ''
@property
def name_without_creator(self) -> str:
return self.name.partition(':')[-1].lower().strip()
@classmethod
def from_dict(cls, x: dict[str, object]) -> 'Model':
arch = x['architecture']
@ -165,6 +181,72 @@ def is_ready_for_use() -> bool:
return bool(api_key())
@lru_cache(2)
def free_model_choice_for_text(allow_paid: bool = False) -> tuple[Model, ...]:
gemini_free, gemini_paid = [], []
deep_seek_free, deep_seek_paid = [], []
gpt5_free, gpt5_paid = [], []
gpt_oss_free, gpt_oss_paid = [], []
opus_free, opus_paid = [], []
def only_newest(models: list[Model]) -> tuple[Model, ...]:
if models:
models.sort(key=lambda m: m.created, reverse=True)
return (models[0],)
return ()
def only_cheapest(models: list[Model]) -> tuple[Model, ...]:
if models:
models.sort(key=lambda m: m.pricing.output_token)
return (models[0],)
return ()
for model in get_available_models().values():
if AICapabilities.text_to_text not in model.capabilities:
continue
match model.creator:
case 'google':
if model.family == 'gemini':
gemini_free.append(model) if model.pricing.is_free else gemini_paid.append(model)
case 'deepseek':
deep_seek_free.append(model) if model.pricing.is_free else deep_seek_paid.append(model)
case 'openai':
n = model.name_without_creator
if n.startswith('gpt-5'):
gpt5_free.append(model) if model.pricing.is_free else gpt5_paid.append(model)
elif n.startswith('gpt-oss'):
gpt_oss_free.append(model) if model.pricing.is_free else gpt_oss_paid.append(model)
case 'anthropic':
if model.family == 'opus':
opus_free.append(model) if model.pricing.is_free else opus_paid.append(model)
free = only_newest(gemini_free) + only_newest(gpt5_free) + only_newest(gpt_oss_free) + only_newest(opus_free) + only_newest(deep_seek_free)
if free:
return free
if not allow_paid:
raise NoFreeModels(_('No free models were found for text to text generation'))
return only_cheapest(gemini_paid) + only_cheapest(gpt5_paid) + only_cheapest(opus_paid) + only_cheapest(deep_seek_paid)
def model_choice_for_text() -> Iterator[Model, ...]:
match pref('model_choice_strategy', 'free'):
case 'free-or-paid':
yield from free_model_choice_for_text(allow_paid=True)
case 'free-only':
yield from free_model_choice_for_text(allow_paid=False)
case _:
yield get_available_models()['openrouter/auto']
def text_chat(messages: Sequence[ChatMessage]) -> Iterator[ChatResponse]:
try:
models = tuple(model_choice_for_text())
except Exception as e:
import traceback
yield ChatResponse(exception=e, traceback=traceback.format_exc())
if not models:
models = (get_available_models()['openrouter/auto'],)
if __name__ == '__main__':
from pprint import pprint
for m in get_available_models().values():

View File

@ -59,13 +59,13 @@ class Model(QWidget):
l.setContentsMargins(0, 0, 0, 0)
self.for_text = for_text
self.model_id, self.model_name = pref(
'text_model' if for_text else 'text_to_image_model', ('', _('Automatic (low cost)')))
'text_model' if for_text else 'text_to_image_model', ('', _('Automatic')))
self.la = la = QLabel(self.model_name)
self.setToolTip(_('The model to use for text related tasks') if for_text else _(
'The model to use for generating images from text'))
self.setToolTip(self.toolTip() + '\n\n' + _(
'If not specified an appropriate free to use model is chosen automatically.\n'
'If no free model is available then cheaper ones are preferred.'))
'If not specified an appropriate model is chosen automatically.\n'
'See the option for "Model choice strategy" to control how models are automatically chosen.'))
self.b = b = QPushButton(_('&Change'))
b.setToolTip(_('Choose a model'))
l.addWidget(la), l.addWidget(b)
@ -380,6 +380,24 @@ class ConfigWidget(QWidget):
l.addRow(_('API &key:'), a)
if key := pref('api_key'):
a.setText(from_hex_unicode(key))
self.model_strategy = ms = QComboBox(self)
l.addRow(_('Model choice strategy:'), ms)
ms.addItem(_('Free only'), 'free-only')
ms.addItem(_('Free or paid'), 'free-or-paid')
ms.addItem(_('High quality'), 'native')
if strat := pref('model_choice_strategy'):
ms.setCurrentIndex(max(0, ms.findData(strat)))
ms.setToolTip('<p>' + _(
'The model choice strategy controls how a model to query is chosen when no specific'
' model is specified. The choices are:<ul>\n'
'<li><b>Free only</b> - Only uses free models. Can lead to lower quality/slower'
' results, with some rate limiting as well. Prefers unmoderated models where possible. If no free models'
' are available, will fail with an error.\n'
'<li><b>Free or paid</b> - Like Free only, but fallback to non-free models if no free ones are available.\n'
'<li><b>High quality</b> - Automatically choose a model based on the query, for best possible'
" results, regardless of cost. Uses OpenRouter's own automatic model selection."
))
self.text_model = tm = Model(parent=self)
tm.select_model.connect(self.select_model)
l.addRow(_('Model for &text tasks:'), tm)
@ -395,9 +413,13 @@ class ConfigWidget(QWidget):
def api_key(self) -> str:
return self.api_key_edit.text().strip()
@property
def model_choice_strategy(self) -> str:
return self.model_strategy.currentData()
@property
def settings(self) -> dict[str, Any]:
return {'api_key': as_hex_unicode(self.api_key)}
return {'api_key': as_hex_unicode(self.api_key), 'model_choice_strategy': self.model_choice_strategy}
@property
def is_ready_for_use(self) -> bool: