Merge pull request #1759 from hlohaus/goo

.har files
2024-03-26 21:50:35 +01:00 · 2024-03-26 21:50:35 +01:00 · dd08125bb4
parent cf3f8cc6bd fd92918b77
commit dd08125bb4
9 changed files with 86 additions and 62 deletions
--- a/.gitignore
+++ b/.gitignore
@ -53,4 +53,5 @@ info.txt
 local.py
 *.gguf
 image.py
-.buildozer
+.buildozer
 hardir
--- a/README.md
+++ b/README.md
@ -89,7 +89,7 @@ As per the survey, here is a list of improvements to come
 ```sh
 docker pull hlohaus789/g4f
-docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" hlohaus789/g4f:latest
+docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" -v ${PWD}/hardir:/app/hardir hlohaus789/g4f:latest
 ```
 3. Open the included client on: [http://localhost:8080/chat/](http://localhost:8080/chat/)
 or set the API base in your client to: [http://localhost:1337/v1](http://localhost:1337/v1)
@ -218,9 +218,12 @@ See: [/docs/interference](/docs/interference.md)
 ### Configuration
-##### Cookies / Access Token
+#### Cookies
-For generating images with Bing and for the OpenAI Chat  you need cookies or a token from your browser session. From Bing you need the "_U" cookie and from OpenAI you need the "access_token". You can pass the cookies / the access token in the create function or you use the `set_cookies` setter before you run G4F:
+You need cookies for BingCreateImages and the Gemini Provider.
 From Bing you need the "_U" cookie and from Gemini you need the "__Secure-1PSID" cookie.
 Sometimes you doesn't need the "__Secure-1PSID" cookie, but some other auth cookies.
 You can pass the cookies in the create function or you use the `set_cookies` setter before you run G4F:
 ```python
 from g4f.cookies import set_cookies
@ -228,20 +231,32 @@ from g4f.cookies import set_cookies
 set_cookies(".bing.com", {
  "_U": "cookie value"
 })
 set_cookies("chat.openai.com", {
  "access_token": "token value"
 })
 set_cookies(".google.com", {
  "__Secure-1PSID": "cookie value"
 })
 ...
 ```
-Alternatively, G4F reads the cookies with `browser_cookie3` from your browser
+#### .HAR File for OpenaiChat Provider
 or it starts a browser instance with selenium `webdriver` for logging in.
-##### Using Proxy
+##### Generating a .HAR File
 To utilize the OpenaiChat provider, a .har file is required from https://chat.openai.com/. Follow the steps below to create a valid .har file:
 1. Navigate to https://chat.openai.com/ using your preferred web browser and log in with your credentials.
 2. Access the Developer Tools in your browser. This can typically be done by right-clicking the page and selecting "Inspect," or by pressing F12 or Ctrl+Shift+I (Cmd+Option+I on a Mac).
 3. With the Developer Tools open, switch to the "Network" tab.
 4. Reload the website to capture the loading process within the Network tab.
 5. Initiate an action in the chat which can be capture in the .har file.
 6. Right-click any of the network activities listed and select "Save all as HAR with content" to export the .har file.
 ##### Storing the .HAR File
 - Place the exported .har file in the `./hardir` directory if you are using Docker. Alternatively, you can store it in any preferred location within your current working directory.
 Note: Ensure that your .har file is stored securely, as it may contain sensitive information.
 #### Using Proxy
 If you want to hide or change your IP address for the providers, you can set a proxy globally via an environment variable:
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -81,7 +81,14 @@ WORKDIR $G4F_DIR
 COPY requirements.txt $G4F_DIR
 # Upgrade pip for the latest features and install the project's Python dependencies.
-RUN pip install --break-system-packages --upgrade pip && pip install --break-system-packages -r requirements.txt
+RUN pip install --break-system-packages --upgrade pip \
  && pip install --break-system-packages -r requirements.txt
 # Install selenium driver and uninstall webdriver
 RUN pip install --break-system-packages \
    undetected-chromedriver selenium-wire \
  && pip uninstall -y --break-system-packages \
    webdriver plyer
 # Copy the entire package into the container.
 ADD --chown=$G4F_USER:$G4F_USER g4f $G4F_DIR/g4f
--- a/g4f/Provider/bing/conversation.py
+++ b/g4f/Provider/bing/conversation.py
@ -3,8 +3,9 @@ from __future__ import annotations
 from aiohttp import ClientSession
 from ...requests import raise_for_status
 from ...errors import RateLimitError
 from ...providers.conversation import BaseConversation
-class Conversation:
+class Conversation(BaseConversation):
    """
    Represents a conversation with specific attributes.
    """
@ -32,7 +33,7 @@ async def create_conversation(session: ClientSession, headers: dict, tone: str)
    Returns:
    Conversation: An instance representing the created conversation.
    """
-    if tone == "copilot":
+    if tone == "Copilot":
        url = "https://copilot.microsoft.com/turing/conversation/create?bundleVersion=1.1634.3-nodesign2"
    else:
        url = "https://www.bing.com/turing/conversation/create?bundleVersion=1.1626.1"
--- a/g4f/Provider/needs_auth/OpenaiChat.py
+++ b/g4f/Provider/needs_auth/OpenaiChat.py
@ -3,10 +3,10 @@ from __future__ import annotations
 import asyncio
 import uuid
 import json
 import os
 import base64
 import time
 from aiohttp import ClientWebSocketResponse
 from copy import copy
 try:
    import webview
@ -22,13 +22,13 @@ except ImportError:
    pass
 from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
 from ..helper import get_cookies
 from ...webdriver import get_browser
 from ...typing import AsyncResult, Messages, Cookies, ImageType, Union, AsyncIterator
 from ...requests import get_args_from_browser, raise_for_status
 from ...requests.aiohttp import StreamSession
 from ...image import to_image, to_bytes, ImageResponse, ImageRequest
-from ...errors import MissingRequirementsError, MissingAuthError, ProviderNotWorkingError
+from ...errors import MissingAuthError
 from ...providers.conversation import BaseConversation
 from ..openai.har_file import getArkoseAndAccessToken
 from ... import debug
@ -56,11 +56,6 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        prompt: str = None,
        model: str = "",
        messages: Messages = [],
        history_disabled: bool = False,
        action: str = "next",
        conversation_id: str = None,
        parent_id: str = None,
        image: ImageType = None,
        **kwargs
    ) -> Response:
        """
@ -89,12 +84,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        generator = cls.create_async_generator(
            model,
            messages,
-            history_disabled=history_disabled,
+            return_conversation=True,
            action=action,
            conversation_id=conversation_id,
            parent_id=parent_id,
            image=image,
            response_fields=True,
            **kwargs
        )
        return Response(
@ -209,7 +199,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        } for message in messages]
        # Check if there is an image response
-        if image_request:
+        if image_request is not None:
            # Change content in last user message
            messages[-1]["content"] = {
                "content_type": "multimodal_text",
@ -308,10 +298,11 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        history_disabled: bool = True,
        action: str = "next",
        conversation_id: str = None,
        conversation: Conversation = None,
        parent_id: str = None,
        image: ImageType = None,
        image_name: str = None,
-        response_fields: bool = False,
+        return_conversation: bool = False,
        **kwargs
    ) -> AsyncResult:
        """
@ -330,7 +321,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
            conversation_id (str): ID of the conversation.
            parent_id (str): ID of the parent message.
            image (ImageType): Image to include in the conversation.
-            response_fields (bool): Flag to include response fields in the output.
+            return_conversation (bool): Flag to include response fields in the output.
            **kwargs: Additional keyword arguments.
        Yields:
@ -387,6 +378,8 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                arkose_token, api_key, cookies = await getArkoseAndAccessToken(proxy)
                cls._create_request_args(cookies)
                cls._set_api_key(api_key)
                if arkose_token is None:
                    raise MissingAuthError("No arkose token found in .har file")
            try:
                image_request = await cls.upload_image(session, cls._headers, image, image_name) if image else None
@ -396,7 +389,8 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                    print(f"{e.__class__.__name__}: {e}")
            model = cls.get_model(model).replace("gpt-3.5-turbo", "text-davinci-002-render-sha")
-            fields = ResponseFields()
+            fields = Conversation() if conversation is None else copy(conversation)
            fields.finish_reason = None
            while fields.finish_reason is None:
                conversation_id = conversation_id if fields.conversation_id is None else fields.conversation_id
                parent_id = parent_id if fields.message_id is None else fields.message_id
@ -409,7 +403,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                    "conversation_id": conversation_id,
                    "parent_message_id": parent_id,
                    "model": model,
-                    "history_and_training_disabled": history_disabled and not auto_continue,
+                    "history_and_training_disabled": history_disabled and not auto_continue and not return_conversation,
                    "websocket_request_id": websocket_request_id
                }
                if action != "continue":
@ -422,8 +416,6 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                }
                if need_arkose:
                    headers["OpenAI-Sentinel-Arkose-Token"] = arkose_token
                    headers["OpenAI-Sentinel-Chat-Requirements-Token"] = chat_token
                async with session.post(
                    f"{cls.url}/backend-api/conversation",
                    json=data,
@ -432,15 +424,15 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                    cls._update_request_args(session)
                    await raise_for_status(response)
                    async for chunk in cls.iter_messages_chunk(response.iter_lines(), session, fields):
-                        if response_fields:
+                        if return_conversation:
-                            response_fields = False
+                            return_conversation = False
                            yield fields
                        yield chunk
                if not auto_continue:
                    break
                action = "continue"
                await asyncio.sleep(5)
-            if history_disabled and auto_continue:
+            if history_disabled and auto_continue and not return_conversation:
                await cls.delete_conversation(session, cls._headers, fields.conversation_id)
    @staticmethod
@ -458,7 +450,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
        cls,
        messages: AsyncIterator,
        session: StreamSession,
-        fields: ResponseFields
+        fields: Conversation
    ) -> AsyncIterator:
        last_message: int = 0
        async for message in messages:
@ -487,7 +479,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
                break
    @classmethod
-    async def iter_messages_line(cls, session: StreamSession, line: bytes, fields: ResponseFields) -> AsyncIterator:
+    async def iter_messages_line(cls, session: StreamSession, line: bytes, fields: Conversation) -> AsyncIterator:
        if not line.startswith(b"data: "):
            return
        elif line.startswith(b"data: [DONE]"):
@ -618,7 +610,7 @@ this.fetch = async (url, options) => {
    @classmethod
    def _update_request_args(cls, session: StreamSession):
        for c in session.cookie_jar if hasattr(session, "cookie_jar") else session.cookies.jar:
-            cls._cookies[c.name if hasattr(c, "name") else c.key] = c.value
+            cls._cookies[c.key if hasattr(c, "key") else c.name] = c.value
        cls._update_cookie_header()
    @classmethod
@ -631,7 +623,7 @@ this.fetch = async (url, options) => {
    def _update_cookie_header(cls):
        cls._headers["Cookie"] = cls._format_cookies(cls._cookies)
-class ResponseFields:
+class Conversation(BaseConversation):
    """
    Class to encapsulate response fields.
    """
@ -664,7 +656,7 @@ class Response():
            self._generator = None
            chunks = []
            async for chunk in self._generator:
-                if isinstance(chunk, ResponseFields):
+                if isinstance(chunk, Conversation):
                    self._fields = chunk
                else:
                    yield chunk
--- a/g4f/Provider/openai/har_file.py
+++ b/g4f/Provider/openai/har_file.py
@ -11,11 +11,6 @@ from copy import deepcopy
 from .crypt import decrypt, encrypt
 from ...requests import StreamSession
 arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147"
 sessionUrl = "https://chat.openai.com/api/auth/session"
 chatArk = None
 accessToken = None
 class arkReq:
    def __init__(self, arkURL, arkBx, arkHeader, arkBody, arkCookies, userAgent):
        self.arkURL = arkURL
@ -25,21 +20,30 @@ class arkReq:
        self.arkCookies = arkCookies
        self.userAgent = userAgent
 arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147"
 sessionUrl = "https://chat.openai.com/api/auth/session"
 chatArk: arkReq = None
 accessToken: str = None
 cookies: dict = None
 def readHAR():
    dirPath = "./"
    harPath = []
    chatArks = []
    accessToken = None
    cookies = {}
    for root, dirs, files in os.walk(dirPath):
        for file in files:
            if file.endswith(".har"):
                harPath.append(os.path.join(root, file))
        if harPath:
            break
    if not harPath:
        raise RuntimeError("No .har file found")
    for path in harPath:
-        with open(path, 'r') as file:
+        with open(path, 'rb') as file:
            try:
-                harFile = json.load(file)
+                harFile = json.loads(file.read())
            except json.JSONDecodeError:
                # Error: not a HAR file!
                continue
@ -48,11 +52,12 @@ def readHAR():
                    chatArks.append(parseHAREntry(v))
                elif v['request']['url'] == sessionUrl:
                    accessToken = json.loads(v["response"]["content"]["text"]).get("accessToken")
-    if not chatArks:
+                    cookies = {c['name']: c['value'] for c in v['request']['cookies']}
        RuntimeError("No arkose requests found in .har files")
    if not accessToken:
        RuntimeError("No accessToken found in .har files")
-    return chatArks.pop(), accessToken
+    if not chatArks:
        return None, accessToken, cookies
    return chatArks.pop(), accessToken, cookies
 def parseHAREntry(entry) -> arkReq:
    tmpArk = arkReq(
@ -60,7 +65,7 @@ def parseHAREntry(entry) -> arkReq:
        arkBx="",
        arkHeader={h['name'].lower(): h['value'] for h in entry['request']['headers'] if h['name'].lower() not in ['content-length', 'cookie'] and not h['name'].startswith(':')},
        arkBody={p['name']: unquote(p['value']) for p in entry['request']['postData']['params'] if p['name'] not in ['rnd']},
-        arkCookies=[{'name': c['name'], 'value': c['value'], 'expires': c['expires']} for c in entry['request']['cookies']],
+        arkCookies={c['name']: c['value'] for c in entry['request']['cookies']},
        userAgent=""
    )
    tmpArk.userAgent = tmpArk.arkHeader.get('user-agent', '')
@ -81,7 +86,6 @@ def genArkReq(chatArk: arkReq) -> arkReq:
    tmpArk.arkBody['bda'] = base64.b64encode(bda.encode()).decode()
    tmpArk.arkBody['rnd'] = str(random.random())
    tmpArk.arkHeader['x-ark-esync-value'] = bw
    tmpArk.arkCookies = {cookie['name']: cookie['value'] for cookie in tmpArk.arkCookies}
    return tmpArk
 async def sendRequest(tmpArk: arkReq, proxy: str = None):
@ -117,8 +121,10 @@ def getN() -> str:
    return base64.b64encode(timestamp.encode()).decode()
 async def getArkoseAndAccessToken(proxy: str):
-    global chatArk, accessToken
+    global chatArk, accessToken, cookies
    if chatArk is None or accessToken is None:
-        chatArk, accessToken = readHAR()
+        chatArk, accessToken, cookies = readHAR()
    if chatArk is None:
        return None, accessToken, cookies
    newReq = genArkReq(chatArk)
-    return await sendRequest(newReq, proxy), accessToken, newReq.arkCookies
+    return await sendRequest(newReq, proxy), accessToken, cookies
--- a/g4f/gui/server/api.py
+++ b/g4f/gui/server/api.py
@ -39,9 +39,9 @@ from g4f.errors import VersionNotFoundError
 from g4f.Provider import ProviderType, __providers__, __map__
 from g4f.providers.base_provider import ProviderModelMixin
 from g4f.Provider.bing.create_images import patch_provider
-from g4f.Provider.Bing import Conversation
+from g4f.providers.conversation import BaseConversation
-conversations: dict[str, Conversation] = {}
+conversations: dict[str, BaseConversation] = {}
 class Api():
@ -230,14 +230,14 @@ class Api():
                if first:
                    first = False
                    yield self._format_json("provider", get_last_provider(True))
-                if isinstance(chunk, Conversation):
+                if isinstance(chunk, BaseConversation):
                    conversations[conversation_id] = chunk
                    yield self._format_json("conversation", conversation_id)
                elif isinstance(chunk, Exception):
                    logging.exception(chunk)
                    yield self._format_json("message", get_error_message(chunk))
                else:
-                    yield self._format_json("content", chunk)
+                    yield self._format_json("content", str(chunk))
        except Exception as e:
            logging.exception(e)
            yield self._format_json('error', get_error_message(e))
--- a/g4f/providers/conversation.py
+++ b/g4f/providers/conversation.py
@ -0,0 +1,2 @@
 class BaseConversation:
    ...
--- a/hardir/.gitkeep
+++ b/hardir/.gitkeep