Merge pull request #1759 from hlohaus/goo

.har files
2024-03-26 21:50:35 +01:00 · 2024-03-26 21:50:35 +01:00 · dd08125bb4
parent cf3f8cc6bd fd92918b77
commit dd08125bb4
9 changed files with 86 additions and 62 deletions
--- a/.gitignore
+++ b/.gitignore
@ -53,4 +53,5 @@ info.txt
 local.py
 *.gguf
 image.py
-.buildozer
+.buildozer
+hardir
--- a/README.md
+++ b/README.md
@ -89,7 +89,7 @@ As per the survey, here is a list of improvements to come

 ```sh
 docker pull hlohaus789/g4f
-docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" hlohaus789/g4f:latest
+docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" -v ${PWD}/hardir:/app/hardir hlohaus789/g4f:latest
 ```
 3. Open the included client on: [http://localhost:8080/chat/](http://localhost:8080/chat/)
 or set the API base in your client to: [http://localhost:1337/v1](http://localhost:1337/v1)
@ -218,9 +218,12 @@ See: [/docs/interference](/docs/interference.md)

 ### Configuration

-##### Cookies / Access Token
+#### Cookies

-For generating images with Bing and for the OpenAI Chat  you need cookies or a token from your browser session. From Bing you need the "_U" cookie and from OpenAI you need the "access_token". You can pass the cookies / the access token in the create function or you use the `set_cookies` setter before you run G4F:
+You need cookies for BingCreateImages and the Gemini Provider.
+From Bing you need the "_U" cookie and from Gemini you need the "__Secure-1PSID" cookie.
+Sometimes you doesn't need the "__Secure-1PSID" cookie, but some other auth cookies.
+You can pass the cookies in the create function or you use the `set_cookies` setter before you run G4F:

 ```python
 from g4f.cookies import set_cookies
@ -228,20 +231,32 @@ from g4f.cookies import set_cookies
 set_cookies(".bing.com", {
  "_U": "cookie value"
 })
-set_cookies("chat.openai.com", {
-  "access_token": "token value"
-})
 set_cookies(".google.com", {
  "__Secure-1PSID": "cookie value"
 })
-
 ...
 ```

-Alternatively, G4F reads the cookies with `browser_cookie3` from your browser
-or it starts a browser instance with selenium `webdriver` for logging in.
+#### .HAR File for OpenaiChat Provider

-##### Using Proxy
+##### Generating a .HAR File
+
+To utilize the OpenaiChat provider, a .har file is required from https://chat.openai.com/. Follow the steps below to create a valid .har file:
+
+1. Navigate to https://chat.openai.com/ using your preferred web browser and log in with your credentials.
+2. Access the Developer Tools in your browser. This can typically be done by right-clicking the page and selecting "Inspect," or by pressing F12 or Ctrl+Shift+I (Cmd+Option+I on a Mac).
+3. With the Developer Tools open, switch to the "Network" tab.
+4. Reload the website to capture the loading process within the Network tab.
+5. Initiate an action in the chat which can be capture in the .har file.
+6. Right-click any of the network activities listed and select "Save all as HAR with content" to export the .har file.
+
+##### Storing the .HAR File
+
+- Place the exported .har file in the `./hardir` directory if you are using Docker. Alternatively, you can store it in any preferred location within your current working directory.
+
+Note: Ensure that your .har file is stored securely, as it may contain sensitive information.
+
+#### Using Proxy

 If you want to hide or change your IP address for the providers, you can set a proxy globally via an environment variable:

--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -81,7 +81,14 @@ WORKDIR $G4F_DIR
 COPY requirements.txt $G4F_DIR

 # Upgrade pip for the latest features and install the project's Python dependencies.
-RUN pip install --break-system-packages --upgrade pip && pip install --break-system-packages -r requirements.txt
+RUN pip install --break-system-packages --upgrade pip \
+  && pip install --break-system-packages -r requirements.txt
+
+# Install selenium driver and uninstall webdriver
+RUN pip install --break-system-packages \
+    undetected-chromedriver selenium-wire \
+  && pip uninstall -y --break-system-packages \
+    webdriver plyer

 # Copy the entire package into the container.
 ADD --chown=$G4F_USER:$G4F_USER g4f $G4F_DIR/g4f
--- a/g4f/Provider/bing/conversation.py
+++ b/g4f/Provider/bing/conversation.py
@ -3,8 +3,9 @@ from __future__ import annotations
 from aiohttp import ClientSession
 from ...requests import raise_for_status
 from ...errors import RateLimitError
+from ...providers.conversation import BaseConversation

-class Conversation:
+class Conversation(BaseConversation):
    """
    Represents a conversation with specific attributes.
    """
@ -32,7 +33,7 @@ async def create_conversation(session: ClientSession, headers: dict, tone: str)
    Returns:
    Conversation: An instance representing the created conversation.
    """
-    if tone == "copilot":
+    if tone == "Copilot":
        url = "https://copilot.microsoft.com/turing/conversation/create?bundleVersion=1.1634.3-nodesign2"
    else:
        url = "https://www.bing.com/turing/conversation/create?bundleVersion=1.1626.1"
--- a/g4f/Provider/needs_auth/OpenaiChat.py
+++ b/g4f/Provider/needs_auth/OpenaiChat.py
@ -3,10 +3,10 @@ from __future__ import annotations
 import asyncio
 import uuid
 import json
-import os
 import base64
 import time
 from aiohttp import ClientWebSocketResponse
+from copy import copy

 try:
    import webview
@ -22,13 +22,13 @@ except ImportError:
    pass

 from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
-from ..helper import get_cookies
 from ...webdriver import get_browser
 from ...typing import AsyncResult, Messages, Cookies, ImageType, Union, AsyncIterator
 from ...requests import get_args_from_browser, raise_for_status
 from ...requests.aiohttp import StreamSession
 from ...image import to_image, to_bytes, ImageResponse, ImageRequest
-from ...errors import MissingRequirementsError, MissingAuthError, ProviderNotWorkingError
+from ...errors import MissingAuthError
+from ...providers.conversation import BaseConversation
 from ..openai.har_file import getArkoseAndAccessToken
 from ... import debug

@ -56,11 +56,6 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        prompt: str = None,
        model: str = "",
        messages: Messages = [],
-        history_disabled: bool = False,
-        action: str = "next",
-        conversation_id: str = None,
-        parent_id: str = None,
-        image: ImageType = None,
        **kwargs
    ) -> Response:
        """
@ -89,12 +84,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        generator = cls.create_async_generator(
            model,
            messages,
-            history_disabled=history_disabled,
-            action=action,
-            conversation_id=conversation_id,
-            parent_id=parent_id,
-            image=image,
-            response_fields=True,
+            return_conversation=True,
            **kwargs
        )
        return Response(
@ -209,7 +199,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        } for message in messages]

        # Check if there is an image response
-        if image_request:
+        if image_request is not None:
            # Change content in last user message
            messages[-1]["content"] = {
                "content_type": "multimodal_text",
@ -308,10 +298,11 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        history_disabled: bool = True,
        action: str = "next",
        conversation_id: str = None,
+        conversation: Conversation = None,
        parent_id: str = None,
        image: ImageType = None,
        image_name: str = None,
-        response_fields: bool = False,
+        return_conversation: bool = False,
        **kwargs
    ) -> AsyncResult:
        """
@ -330,7 +321,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
            conversation_id (str): ID of the conversation.
            parent_id (str): ID of the parent message.
            image (ImageType): Image to include in the conversation.
-            response_fields (bool): Flag to include response fields in the output.
+            return_conversation (bool): Flag to include response fields in the output.
            **kwargs: Additional keyword arguments.

        Yields:
@ -387,6 +378,8 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                arkose_token, api_key, cookies = await getArkoseAndAccessToken(proxy)
                cls._create_request_args(cookies)
                cls._set_api_key(api_key)
+                if arkose_token is None:
+                    raise MissingAuthError("No arkose token found in .har file")

            try:
                image_request = await cls.upload_image(session, cls._headers, image, image_name) if image else None
@ -396,7 +389,8 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                    print(f"{e.__class__.__name__}: {e}")

            model = cls.get_model(model).replace("gpt-3.5-turbo", "text-davinci-002-render-sha")
-            fields = ResponseFields()
+            fields = Conversation() if conversation is None else copy(conversation)
+            fields.finish_reason = None
            while fields.finish_reason is None:
                conversation_id = conversation_id if fields.conversation_id is None else fields.conversation_id
                parent_id = parent_id if fields.message_id is None else fields.message_id
@ -409,7 +403,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                    "conversation_id": conversation_id,
                    "parent_message_id": parent_id,
                    "model": model,
-                    "history_and_training_disabled": history_disabled and not auto_continue,
+                    "history_and_training_disabled": history_disabled and not auto_continue and not return_conversation,
                    "websocket_request_id": websocket_request_id
                }
                if action != "continue":
@ -422,8 +416,6 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                }
                if need_arkose:
                    headers["OpenAI-Sentinel-Arkose-Token"] = arkose_token
-                    headers["OpenAI-Sentinel-Chat-Requirements-Token"] = chat_token
-
                async with session.post(
                    f"{cls.url}/backend-api/conversation",
                    json=data,
@ -432,15 +424,15 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                    cls._update_request_args(session)
                    await raise_for_status(response)
                    async for chunk in cls.iter_messages_chunk(response.iter_lines(), session, fields):
-                        if response_fields:
-                            response_fields = False
+                        if return_conversation:
+                            return_conversation = False
                            yield fields
                        yield chunk
                if not auto_continue:
                    break
                action = "continue"
                await asyncio.sleep(5)
-            if history_disabled and auto_continue:
+            if history_disabled and auto_continue and not return_conversation:
                await cls.delete_conversation(session, cls._headers, fields.conversation_id)

    @staticmethod
@ -458,7 +450,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        cls,
        messages: AsyncIterator,
        session: StreamSession,
-        fields: ResponseFields
+        fields: Conversation
    ) -> AsyncIterator:
        last_message: int = 0
        async for message in messages:
@ -487,7 +479,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                break

    @classmethod
-    async def iter_messages_line(cls, session: StreamSession, line: bytes, fields: ResponseFields) -> AsyncIterator:
+    async def iter_messages_line(cls, session: StreamSession, line: bytes, fields: Conversation) -> AsyncIterator:
        if not line.startswith(b"data: "):
            return
        elif line.startswith(b"data: [DONE]"):
@ -618,7 +610,7 @@ this.fetch = async (url, options) => {
    @classmethod
    def _update_request_args(cls, session: StreamSession):
        for c in session.cookie_jar if hasattr(session, "cookie_jar") else session.cookies.jar:
-            cls._cookies[c.name if hasattr(c, "name") else c.key] = c.value
+            cls._cookies[c.key if hasattr(c, "key") else c.name] = c.value
        cls._update_cookie_header()

    @classmethod
@ -631,7 +623,7 @@ this.fetch = async (url, options) => {
    def _update_cookie_header(cls):
        cls._headers["Cookie"] = cls._format_cookies(cls._cookies)

-class ResponseFields:
+class Conversation(BaseConversation):
    """
    Class to encapsulate response fields.
    """
@ -664,7 +656,7 @@ class Response():
            self._generator = None
            chunks = []
            async for chunk in self._generator:
-                if isinstance(chunk, ResponseFields):
+                if isinstance(chunk, Conversation):
                    self._fields = chunk
                else:
                    yield chunk
--- a/g4f/Provider/openai/har_file.py
+++ b/g4f/Provider/openai/har_file.py
@ -11,11 +11,6 @@ from copy import deepcopy
 from .crypt import decrypt, encrypt
 from ...requests import StreamSession

-arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147"
-sessionUrl = "https://chat.openai.com/api/auth/session"
-chatArk = None
-accessToken = None
-
 class arkReq:
    def __init__(self, arkURL, arkBx, arkHeader, arkBody, arkCookies, userAgent):
        self.arkURL = arkURL
@ -25,21 +20,30 @@ class arkReq:
        self.arkCookies = arkCookies
        self.userAgent = userAgent

+arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147"
+sessionUrl = "https://chat.openai.com/api/auth/session"
+chatArk: arkReq = None
+accessToken: str = None
+cookies: dict = None
+
 def readHAR():
    dirPath = "./"
    harPath = []
    chatArks = []
    accessToken = None
+    cookies = {}
    for root, dirs, files in os.walk(dirPath):
        for file in files:
            if file.endswith(".har"):
                harPath.append(os.path.join(root, file))
+        if harPath:
+            break
    if not harPath:
        raise RuntimeError("No .har file found")
    for path in harPath:
-        with open(path, 'r') as file:
+        with open(path, 'rb') as file:
            try:
-                harFile = json.load(file)
+                harFile = json.loads(file.read())
            except json.JSONDecodeError:
                # Error: not a HAR file!
                continue
@ -48,11 +52,12 @@ def readHAR():
                    chatArks.append(parseHAREntry(v))
                elif v['request']['url'] == sessionUrl:
                    accessToken = json.loads(v["response"]["content"]["text"]).get("accessToken")
-    if not chatArks:
-        RuntimeError("No arkose requests found in .har files")
+                    cookies = {c['name']: c['value'] for c in v['request']['cookies']}
    if not accessToken:
        RuntimeError("No accessToken found in .har files")
-    return chatArks.pop(), accessToken
+    if not chatArks:
+        return None, accessToken, cookies
+    return chatArks.pop(), accessToken, cookies

 def parseHAREntry(entry) -> arkReq:
    tmpArk = arkReq(
@ -60,7 +65,7 @@ def parseHAREntry(entry) -> arkReq:
        arkBx="",
        arkHeader={h['name'].lower(): h['value'] for h in entry['request']['headers'] if h['name'].lower() not in ['content-length', 'cookie'] and not h['name'].startswith(':')},
        arkBody={p['name']: unquote(p['value']) for p in entry['request']['postData']['params'] if p['name'] not in ['rnd']},
-        arkCookies=[{'name': c['name'], 'value': c['value'], 'expires': c['expires']} for c in entry['request']['cookies']],
+        arkCookies={c['name']: c['value'] for c in entry['request']['cookies']},
        userAgent=""
    )
    tmpArk.userAgent = tmpArk.arkHeader.get('user-agent', '')
@ -81,7 +86,6 @@ def genArkReq(chatArk: arkReq) -> arkReq:
    tmpArk.arkBody['bda'] = base64.b64encode(bda.encode()).decode()
    tmpArk.arkBody['rnd'] = str(random.random())
    tmpArk.arkHeader['x-ark-esync-value'] = bw
-    tmpArk.arkCookies = {cookie['name']: cookie['value'] for cookie in tmpArk.arkCookies}
    return tmpArk

 async def sendRequest(tmpArk: arkReq, proxy: str = None):
@ -117,8 +121,10 @@ def getN() -> str:
    return base64.b64encode(timestamp.encode()).decode()

 async def getArkoseAndAccessToken(proxy: str):
-    global chatArk, accessToken
+    global chatArk, accessToken, cookies
    if chatArk is None or accessToken is None:
-        chatArk, accessToken = readHAR()
+        chatArk, accessToken, cookies = readHAR()
+    if chatArk is None:
+        return None, accessToken, cookies
    newReq = genArkReq(chatArk)
-    return await sendRequest(newReq, proxy), accessToken, newReq.arkCookies
+    return await sendRequest(newReq, proxy), accessToken, cookies
--- a/g4f/gui/server/api.py
+++ b/g4f/gui/server/api.py
@ -39,9 +39,9 @@ from g4f.errors import VersionNotFoundError
 from g4f.Provider import ProviderType, __providers__, __map__
 from g4f.providers.base_provider import ProviderModelMixin
 from g4f.Provider.bing.create_images import patch_provider
-from g4f.Provider.Bing import Conversation
+from g4f.providers.conversation import BaseConversation

-conversations: dict[str, Conversation] = {}
+conversations: dict[str, BaseConversation] = {}

 class Api():

@ -230,14 +230,14 @@ class Api():
                if first:
                    first = False
                    yield self._format_json("provider", get_last_provider(True))
-                if isinstance(chunk, Conversation):
+                if isinstance(chunk, BaseConversation):
                    conversations[conversation_id] = chunk
                    yield self._format_json("conversation", conversation_id)
                elif isinstance(chunk, Exception):
                    logging.exception(chunk)
                    yield self._format_json("message", get_error_message(chunk))
                else:
-                    yield self._format_json("content", chunk)
+                    yield self._format_json("content", str(chunk))
        except Exception as e:
            logging.exception(e)
            yield self._format_json('error', get_error_message(e))
--- a/g4f/providers/conversation.py
+++ b/g4f/providers/conversation.py
@ -0,0 +1,2 @@
+class BaseConversation:
+    ...
--- a/hardir/.gitkeep
+++ b/hardir/.gitkeep