Add vision models to readme

This commit is contained in:
Heiner Lohaus 2024-04-22 01:27:48 +02:00
parent 8dcef3b2a7
commit 4bc4d635bc
14 changed files with 141 additions and 69 deletions

View File

@ -91,7 +91,7 @@ As per the survey, here is a list of improvements to come
```sh
docker pull hlohaus789/g4f
docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" -v ${PWD}/hardir:/app/har_and_cookies hlohaus789/g4f:latest
docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" -v ${PWD}/har_and_cookies:/app/har_and_cookies hlohaus789/g4f:latest
```
3. **Access the Client:**
@ -400,17 +400,17 @@ While we wait for gpt-5, here is a list of new models that are at least better t
| openchat_3.5 | Huggingface | 2+ Providers | [huggingface.co](https://huggingface.co/) |
| pi | Inflection | g4f.Provider.Pi | [inflection.ai](https://inflection.ai/) |
### Image Models
### Image and Vision Models
| Label | Provider | Model | Website |
| ----- | -------- | ----- | ------- |
| Microsoft Designer | Bing | dall-e | [bing.com](https://www.bing.com/images/create) |
| OpenAI ChatGPT | Openai | dall-e | [chat.openai.com](https://chat.openai.com) |
| You.com | You | dall-e | [you.com](https://you.com) |
| DeepInfraImage | DeepInfra | stability-ai/sdxl | [deepinfra.com](https://deepinfra.com) |
| ReplicateImage | Replicate | stability-ai/sdxl | [replicate.com](https://replicate.com) |
| Gemini | Gemini | gemini | [gemini.google.com](https://gemini.google.com) |
| Meta AI | MetaAI | meta | [meta.ai](https://www.meta.ai) |
| Label | Provider | Image Model | Vision Model | Website |
| ----- | -------- | ----------- | ------------ | ------- |
| Microsoft Copilot in Bing | `g4f.Provider.Bing` | dall-e| gpt-4-vision | [bing.com](https://bing.com/chat) |
| DeepInfra | `g4f.Provider.DeepInfra` | stability-ai/sdxl| llava-1.5-7b-hf | [deepinfra.com](https://deepinfra.com) |
| Gemini | `g4f.Provider.Gemini` | gemini| gemini | [gemini.google.com](https://gemini.google.com) |
| Meta AI | `g4f.Provider.MetaAI` | meta| ❌ | [meta.ai](https://www.meta.ai) |
| OpenAI ChatGPT | `g4f.Provider.OpenaiChat` | dall-e| gpt-4-vision | [chat.openai.com](https://chat.openai.com) |
| Replicate | `g4f.Provider.Replicate` | stability-ai/sdxl| ❌ | [replicate.com](https://replicate.com) |
| You.com | `g4f.Provider.You` | dall-e| agent | [you.com](https://you.com) |
## 🔗 Powered by gpt4free

View File

@ -127,8 +127,8 @@ def print_models():
def print_image_models():
lines = [
"| Label | Provider | Model | Website |",
"| ----- | -------- | ----- | ------- |",
"| Label | Provider | Image Model | Vision Model | Website |",
"| ----- | -------- | ----------- | ------------ | ------- |",
]
from g4f.gui.server.api import Api
for image_model in Api.get_image_models():
@ -136,13 +136,15 @@ def print_image_models():
netloc = urlparse(provider_url).netloc.replace("www.", "")
website = f"[{netloc}]({provider_url})"
label = image_model["provider"] if image_model["label"] is None else image_model["label"]
lines.append(f'| {label} | {image_model["provider"]} | {image_model["image_model"]} | {website} |')
if image_model["vision_model"] is None:
image_model["vision_model"] = ""
lines.append(f'| {label} | `g4f.Provider.{image_model["provider"]}` | {image_model["image_model"]}| {image_model["vision_model"]} | {website} |')
print("\n".join(lines))
if __name__ == "__main__":
print_providers()
print("\n", "-" * 50, "\n")
print_models()
#print_providers()
#print("\n", "-" * 50, "\n")
#print_models()
print("\n", "-" * 50, "\n")
print_image_models()

View File

@ -38,8 +38,9 @@ class Bing(AsyncGeneratorProvider, ProviderModelMixin):
supports_message_history = True
supports_gpt_4 = True
default_model = "Balanced"
default_vision_model = "gpt-4-vision"
models = [getattr(Tones, key) for key in Tones.__dict__ if not key.startswith("__")]
@classmethod
def create_async_generator(
cls,

View File

@ -13,6 +13,7 @@ from .bing.create_images import create_images, create_session, get_cookies_from_
class BingCreateImages(AsyncGeneratorProvider, ProviderModelMixin):
label = "Microsoft Designer"
parent = "Bing"
url = "https://www.bing.com/images/create"
working = True
needs_auth = True

View File

@ -1,17 +1,22 @@
from __future__ import annotations
import requests
from ..typing import AsyncResult, Messages
from ..typing import AsyncResult, Messages, ImageType
from ..image import to_data_uri
from .needs_auth.Openai import Openai
class DeepInfra(Openai):
label = "DeepInfra"
url = "https://deepinfra.com"
working = True
needs_auth = False
has_auth = True
supports_stream = True
supports_message_history = True
default_model = 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1'
default_model = "meta-llama/Meta-Llama-3-70b-instruct"
default_vision_model = "llava-hf/llava-1.5-7b-hf"
model_aliases = {
'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1'
}
@classmethod
def get_models(cls):
@ -27,19 +32,12 @@ class DeepInfra(Openai):
model: str,
messages: Messages,
stream: bool,
image: ImageType = None,
api_base: str = "https://api.deepinfra.com/v1/openai",
temperature: float = 0.7,
max_tokens: int = 1028,
**kwargs
) -> AsyncResult:
if not '/' in model:
models = {
'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1',
'dbrx-instruct': 'databricks/dbrx-instruct',
}
model = models.get(model, model)
headers = {
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US',
@ -55,6 +53,19 @@ class DeepInfra(Openai):
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
}
if image is not None:
if not model:
model = cls.default_vision_model
messages[-1]["content"] = [
{
"type": "image_url",
"image_url": {"url": to_data_uri(image)}
},
{
"type": "text",
"text": messages[-1]["content"]
}
]
return super().create_async_generator(
model, messages,
stream=stream,

View File

@ -9,6 +9,7 @@ from ..image import ImageResponse
class DeepInfraImage(AsyncGeneratorProvider, ProviderModelMixin):
url = "https://deepinfra.com"
parent = "DeepInfra"
working = True
default_model = 'stability-ai/sdxl'
image_models = [default_model]

View File

@ -6,6 +6,7 @@ from .MetaAI import MetaAI
class MetaAIAccount(MetaAI):
needs_auth = True
parent = "MetaAI"
image_models = ["meta"]
@classmethod

View File

@ -11,6 +11,7 @@ from ..errors import ResponseError
class ReplicateImage(AsyncGeneratorProvider, ProviderModelMixin):
url = "https://replicate.com"
parent = "Replicate"
working = True
default_model = 'stability-ai/sdxl'
default_versions = [

View File

@ -14,13 +14,16 @@ from .you.har_file import get_telemetry_ids
from .. import debug
class You(AsyncGeneratorProvider, ProviderModelMixin):
label = "You.com"
url = "https://you.com"
working = True
supports_gpt_35_turbo = True
supports_gpt_4 = True
default_model = "gpt-3.5-turbo"
default_vision_model = "agent"
image_models = ["dall-e"]
models = [
"gpt-3.5-turbo",
default_model,
"gpt-4",
"gpt-4-turbo",
"claude-instant",
@ -29,12 +32,12 @@ class You(AsyncGeneratorProvider, ProviderModelMixin):
"claude-3-sonnet",
"gemini-pro",
"zephyr",
"dall-e",
default_vision_model,
*image_models
]
model_aliases = {
"claude-v2": "claude-2"
}
image_models = ["dall-e"]
_cookies = None
_cookies_used = 0
_telemetry_ids = []
@ -52,7 +55,7 @@ class You(AsyncGeneratorProvider, ProviderModelMixin):
chat_mode: str = "default",
**kwargs,
) -> AsyncResult:
if image is not None:
if image is not None or model == cls.default_vision_model:
chat_mode = "agent"
elif not model or model == cls.default_model:
...
@ -63,13 +66,18 @@ class You(AsyncGeneratorProvider, ProviderModelMixin):
chat_mode = "custom"
model = cls.get_model(model)
async with StreamSession(
proxies={"all": proxy},
proxy=proxy,
impersonate="chrome",
timeout=(30, timeout)
) as session:
cookies = await cls.get_cookies(session) if chat_mode != "default" else None
upload = json.dumps([await cls.upload_file(session, cookies, to_bytes(image), image_name)]) if image else ""
upload = ""
if image is not None:
upload_file = await cls.upload_file(
session, cookies,
to_bytes(image), image_name
)
upload = json.dumps([upload_file])
headers = {
"Accept": "text/event-stream",
"Referer": f"{cls.url}/search?fromSearchBar=true&tbm=youchat",

View File

@ -16,6 +16,7 @@ try:
except ImportError:
pass
from ... import debug
from ...typing import Messages, Cookies, ImageType, AsyncResult
from ..base_provider import AsyncGeneratorProvider
from ..helper import format_prompt, get_cookies
@ -54,6 +55,55 @@ class Gemini(AsyncGeneratorProvider):
needs_auth = True
working = True
image_models = ["gemini"]
default_vision_model = "gemini"
_cookies: Cookies = None
@classmethod
async def nodriver_login(cls) -> Cookies:
try:
import nodriver as uc
except ImportError:
return
try:
from platformdirs import user_config_dir
user_data_dir = user_config_dir("g4f-nodriver")
except:
user_data_dir = None
if debug.logging:
print(f"Open nodriver with user_dir: {user_data_dir}")
browser = await uc.start(user_data_dir=user_data_dir)
page = await browser.get(f"{cls.url}/app")
await page.select("div.ql-editor.textarea", 240)
cookies = {}
for c in await page.browser.cookies.get_all():
if c.domain.endswith(".google.com"):
cookies[c.name] = c.value
await page.close()
return cookies
@classmethod
async def webdriver_login(cls, proxy: str):
driver = None
try:
driver = get_browser(proxy=proxy)
try:
driver.get(f"{cls.url}/app")
WebDriverWait(driver, 5).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))
)
except:
login_url = os.environ.get("G4F_LOGIN_URL")
if login_url:
yield f"Please login: [Google Gemini]({login_url})\n\n"
WebDriverWait(driver, 240).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))
)
cls._cookies = get_driver_cookies(driver)
except MissingRequirementsError:
pass
finally:
if driver:
driver.close()
@classmethod
async def create_async_generator(
@ -73,47 +123,30 @@ class Gemini(AsyncGeneratorProvider):
if cookies is None:
cookies = {}
cookies["__Secure-1PSID"] = api_key
cookies = cookies if cookies else get_cookies(".google.com", False, True)
cls._cookies = cookies or cls._cookies or get_cookies(".google.com", False, True)
base_connector = get_connector(connector, proxy)
async with ClientSession(
headers=REQUEST_HEADERS,
connector=base_connector
) as session:
snlm0e = await cls.fetch_snlm0e(session, cookies) if cookies else None
snlm0e = await cls.fetch_snlm0e(session, cls._cookies) if cls._cookies else None
if not snlm0e:
driver = None
try:
driver = get_browser(proxy=proxy)
try:
driver.get(f"{cls.url}/app")
WebDriverWait(driver, 5).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))
)
except:
login_url = os.environ.get("G4F_LOGIN_URL")
if login_url:
yield f"Please login: [Google Gemini]({login_url})\n\n"
WebDriverWait(driver, 240).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))
)
cookies = get_driver_cookies(driver)
except MissingRequirementsError:
pass
finally:
if driver:
driver.close()
cls._cookies = await cls.nodriver_login();
if cls._cookies is None:
async for chunk in cls.webdriver_login(proxy):
yield chunk
if not snlm0e:
if "__Secure-1PSID" not in cookies:
if "__Secure-1PSID" not in cls._cookies:
raise MissingAuthError('Missing "__Secure-1PSID" cookie')
snlm0e = await cls.fetch_snlm0e(session, cookies)
snlm0e = await cls.fetch_snlm0e(session, cls._cookies)
if not snlm0e:
raise RuntimeError("Invalid auth. SNlM0e not found")
raise RuntimeError("Invalid cookies. SNlM0e not found")
image_url = await cls.upload_image(base_connector, to_bytes(image), image_name) if image else None
async with ClientSession(
cookies=cookies,
cookies=cls._cookies,
headers=REQUEST_HEADERS,
connector=base_connector,
) as client:

View File

@ -4,4 +4,5 @@ from .OpenaiChat import OpenaiChat
class OpenaiAccount(OpenaiChat):
needs_auth = True
parent = "OpenaiChat"
image_models = ["dall-e"]

View File

@ -44,6 +44,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
supports_message_history = True
supports_system_message = True
default_model = None
default_vision_model = "gpt-4-vision"
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-gizmo"]
model_aliases = {
"text-davinci-002-render-sha": "gpt-3.5-turbo",

View File

@ -45,16 +45,20 @@ class Api():
@staticmethod
def get_image_models() -> list[dict]:
image_models = []
for key, provider in __map__.items():
for provider in __providers__:
if hasattr(provider, "image_models"):
if hasattr(provider, "get_models"):
provider.get_models()
parent = provider
if hasattr(provider, "parent"):
parent = __map__[provider.parent]
for model in provider.image_models:
image_models.append({
"provider": key,
"url": provider.url,
"label": provider.label if hasattr(provider, "label") else None,
"image_model": model
"provider": parent.__name__,
"url": parent.url,
"label": parent.label if hasattr(parent, "label") else None,
"image_model": model,
"vision_model": parent.default_vision_model if hasattr(parent, "default_vision_model") else None
})
return image_models

View File

@ -86,7 +86,7 @@ def is_data_uri_an_image(data_uri: str) -> bool:
if image_format not in ALLOWED_EXTENSIONS and image_format != "svg+xml":
raise ValueError("Invalid image format (from mime file type).")
def is_accepted_format(binary_data: bytes) -> bool:
def is_accepted_format(binary_data: bytes) -> str:
"""
Checks if the given binary data represents an image with an accepted format.
@ -241,6 +241,13 @@ def to_bytes(image: ImageType) -> bytes:
else:
return image.read()
def to_data_uri(image: ImageType) -> str:
if not isinstance(image, str):
data = to_bytes(image)
data_base64 = base64.b64encode(data).decode()
return f"data:{is_accepted_format(data)};base64,{data_base64}"
return image
class ImageResponse:
def __init__(
self,