Библиотека
-
import openai
import time
from typing import List, Dict, Optional, Any
MODELS = {
'llama8b': 'meta/llama3-8b-instruct',
'llama70b': 'meta/llama3-70b-instruct',
'llama405b': 'meta/llama-3.1-405b-instruct',
'mistral7b': 'mistralai/mistral-7b-instruct-v0.2',
'gemma7b': 'google/gemma-7b',
'nemotron340b': 'nvidia/nemotron-4-340b-instruct',
'arctic': 'snowflake/arctic',
'phi3mini': 'microsoft/phi-3-mini-128k-instruct',
'deepseekv3': 'deepseek/deepseek-v3.2',
'qwen3coder': 'qwen/qwen-3-coder',
'kimi2.5': 'kimi/kimi-2.5'
}
class NvidiaNIMClient:
def __init__(self, api_keys: Dict[str, List[str]], base_url: str = "https://integrate.api.nvidia.com/v1", use_aliases: bool = True):
self.use_aliases = use_aliases
self.clients = {}
self.current_key_index = {}
for alias_or_model, keys in api_keys.items():
model = MODELS.get(alias_or_model, alias_or_model) if use_aliases else alias_or_model
self.clients[model] = []
for key in keys:
self.clients[model].append(openai.OpenAI(base_url=base_url, api_key=key))
self.current_key_index[model] = 0
self.rate_limit_wait = 60 / 40 + 0.1
def list_models(self):
print("Доступные модели (короткое имя: полное имя):")
for alias, full in MODELS.items():
print(f"{alias}: {full}")
def get_available_models(self) -> List[str]:
if not self.clients:
print("Нет инициализированных моделей/ключей.")
return []
first_model = next(iter(self.clients))
client = self.clients[first_model][0]
try:
response = client.models.list()
models = [m.id for m in response.data]
print(f"Актуальные модели из API (на {first_model}):")
for m in models:
print(m)
return models
except Exception as e:
print(f"Ошибка при запросе списка моделей: {e}")
return []
def _get_client(self, model: str):
clients = self.clients.get(model)
if not clients:
raise ValueError(f"Модель {model} не найдена.")
index = self.current_key_index[model]
client = clients[index]
self.current_key_index[model] = (index + 1) % len(clients)
return client
def chat_completion(self, model: str, messages: List[Dict[str, str]], max_tokens: int = 100, temperature: float = 0.7, top_p: float = 1.0, presence_penalty: float = 0.0, frequency_penalty: float = 0.0, stream: bool = False, **kwargs: Any) -> Dict:
if self.use_aliases:
model = MODELS.get(model, model)
time.sleep(self.rate_limit_wait)
client = self._get_client(model)
try:
response = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
presence_penalty=presence_penalty,
frequency_penalty=frequency_penalty,
stream=stream,
**kwargs
)
if stream:
return response
return response.model_dump()
except openai.RateLimitError:
time.sleep(60)
return self.chat_completion(model, messages, max_tokens, temperature, top_p, presence_penalty, frequency_penalty, stream, **kwargs)
except Exception as e:
print(f"Ошибка: {e}")
return {"error": str(e)}
def completion(self, model: str, prompt: str, max_tokens: int = 100, temperature: float = 0.7, top_p: float = 1.0, stream: bool = False, **kwargs: Any) -> Dict:
if self.use_aliases:
model = MODELS.get(model, model)
time.sleep(self.rate_limit_wait)
client = self._get_client(model)
try:
response = client.completions.create(
model=model,
prompt=prompt,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=stream,
**kwargs
)
if stream:
return response
return response.model_dump()
except openai.RateLimitError:
time.sleep(60)
return self.completion(model, prompt, max_tokens, temperature, top_p, stream, **kwargs)
except Exception as e:
print(f"Ошибка: {e}")
return {"error": str(e)}