phind major improvement ( stream )

removed timeout error, added data streaming. Soon integration into gpt clone
2023-04-20 10:22:44 +01:00 · 2023-04-20 10:22:44 +01:00 · b2459a5897
parent b31d053191
commit b2459a5897
3 changed files with 141 additions and 28 deletions
--- a/README.md
+++ b/README.md
@ -102,16 +102,27 @@ print(response.completion.choices[0].text)
 ### Example: `phind` (use like openai pypi package) <a name="example-phind"></a>

 ```python
-# HELP WANTED: tls_client does not accept stream and timeout gets hit with long responses
-
 import phind

-prompt = 'hello world'
+prompt = 'who won the quatar world cup'

+# help needed: not getting newlines from the stream, please submit a PR if you know how to fix this
+# stream completion
+for result in phind.StreamingCompletion.create(
+    model  = 'gpt-4',
+    prompt = prompt,
+    results     = phind.Search.create(prompt, actualSearch = True), # create search (set actualSearch to False to disable internet)
+    creative    = False,
+    detailed    = False,
+    codeContext = ''):  # up to 3000 chars of code
+
+    print(result.completion.choices[0].text, end='', flush=True)
+
+# normal completion
 result = phind.Completion.create(
    model  = 'gpt-4',
    prompt = prompt,
-    results     = phind.Search.create(prompt, actualSearch = False), # create search (set actualSearch to False to disable internet)
+    results     = phind.Search.create(prompt, actualSearch = True), # create search (set actualSearch to False to disable internet)
    creative    = False,
    detailed    = False,
    codeContext = '') # up to 3000 chars of code
--- a/phind/init.py
+++ b/phind/init.py
@ -1,24 +1,11 @@
 from urllib.parse import quote
-from tls_client   import Session
 from time         import time
 from datetime     import datetime
+from queue        import Queue, Empty
+from threading    import Thread
+from re           import findall

-client         = Session(client_identifier='chrome110')
-client.headers = {
-    'authority': 'www.phind.com',
-    'accept': '*/*',
-    'accept-language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3',
-    'content-type': 'application/json',
-    'origin': 'https://www.phind.com',
-    'referer': 'https://www.phind.com/search',
-    'sec-ch-ua': '"Chromium";v="110", "Google Chrome";v="110", "Not:A-Brand";v="99"',
-    'sec-ch-ua-mobile': '?0',
-    'sec-ch-ua-platform': '"macOS"',
-    'sec-fetch-dest': 'empty',
-    'sec-fetch-mode': 'cors',
-    'sec-fetch-site': 'same-origin',
-    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
-}
+from curl_cffi.requests import post

 class PhindResponse:
    
@ -81,11 +68,19 @@ class Search:
                }
            }
        
-        return client.post('https://www.phind.com/api/bing/search', json = { 
+        headers = {
+            'authority'    : 'www.phind.com',
+            'origin'       : 'https://www.phind.com',
+            'referer'      : 'https://www.phind.com/search',
+            'user-agent'   : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
+        }
+        
+        return post('https://www.phind.com/api/bing/search', headers = headers, json = { 
            'q': prompt,
            'userRankList': {},
            'browserLanguage': language}).json()['rawBingResults']

+
 class Completion:
    def create(
        model = 'gpt-4', 
@ -121,8 +116,15 @@ class Completion:
            }
        }
        
+        headers = {
+            'authority'    : 'www.phind.com',
+            'origin'       : 'https://www.phind.com',
+            'referer'      : f'https://www.phind.com/search?q={quote(prompt)}&c=&source=searchbox&init=true',
+            'user-agent'   : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
+        }
+        
        completion = ''
-        response   = client.post('https://www.phind.com/api/infer/answer', json=json_data, timeout_seconds=200)
+        response   = post('https://www.phind.com/api/infer/answer', headers = headers, json = json_data, timeout=99999)
        for line in response.text.split('\r\n\r\n'):
            completion += (line.replace('data: ', ''))
        
@ -143,3 +145,88 @@ class Completion:
                'total_tokens'      : len(prompt) + len(completion)
            }
        })
+        
+
+class StreamingCompletion:
+    message_queue    = Queue()
+    stream_completed = False
+    
+    def request(model, prompt, results, creative, detailed, codeContext, language) -> None:
+        
+        models = {
+            'gpt-4' : 'expert',
+            'gpt-3.5-turbo' : 'intermediate',
+            'gpt-3.5': 'intermediate',
+        }
+
+        json_data = {
+            'question'    : prompt,
+            'bingResults' : results,
+            'codeContext' : codeContext,
+            'options': {
+                'skill'   : models[model],
+                'date'    : datetime.now().strftime("%d/%m/%Y"),
+                'language': language,
+                'detailed': detailed,
+                'creative': creative
+            }
+        }
+        
+        stream_req = post('https://www.phind.com/api/infer/answer', json=json_data, timeout=99999,
+            content_callback = StreamingCompletion.handle_stream_response,
+            headers = {
+                'authority'    : 'www.phind.com',
+                'origin'       : 'https://www.phind.com',
+                'referer'      : f'https://www.phind.com/search?q={quote(prompt)}&c=&source=searchbox&init=true',
+                'user-agent'   : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
+        })
+
+        StreamingCompletion.stream_completed = True
+
+    @staticmethod
+    def create(
+        model       : str = 'gpt-4', 
+        prompt      : str = '', 
+        results     : dict = None, 
+        creative    : bool = False, 
+        detailed    : bool = False, 
+        codeContext : str = '',
+        language    : str = 'en'):
+        
+        if results is None:
+            results = Search.create(prompt, actualSearch = True)
+        
+        if len(codeContext) > 2999:
+            raise ValueError('codeContext must be less than 3000 characters')
+        
+        Thread(target = StreamingCompletion.request, args = [
+            model, prompt, results, creative, detailed, codeContext, language]).start()
+        
+        while StreamingCompletion.stream_completed != True or not StreamingCompletion.message_queue.empty():
+            try:
+                message = StreamingCompletion.message_queue.get(timeout=0)
+                for token in findall(r'(?<=data: )(.+?)(?=\r\n\r\n)', message.decode()):
+                    yield PhindResponse({
+                        'id'     : f'cmpl-1337-{int(time())}', 
+                        'object' : 'text_completion', 
+                        'created': int(time()), 
+                        'model'  : model, 
+                        'choices': [{
+                                'text'          : token, 
+                                'index'         : 0, 
+                                'logprobs'      : None, 
+                                'finish_reason' : 'stop'
+                        }], 
+                        'usage': {
+                            'prompt_tokens'     : len(prompt), 
+                            'completion_tokens' : len(token), 
+                            'total_tokens'      : len(prompt) + len(token)
+                        }
+                    })
+
+            except Empty:
+                pass
+
+    @staticmethod
+    def handle_stream_response(response):
+        StreamingCompletion.message_queue.put(response)
--- a/testing/phind_test.py
+++ b/testing/phind_test.py
@ -2,6 +2,7 @@ import phind

 prompt = 'hello world'

+# normal completion
 result = phind.Completion.create(
    model  = 'gpt-4',
    prompt = prompt,
@ -11,3 +12,17 @@ result = phind.Completion.create(
    codeContext = '') # up to 3000 chars of code

 print(result.completion.choices[0].text)
+
+prompt = 'who won the quatar world cup'
+
+# help needed: not getting newlines from the stream, please submit a PR if you know how to fix this
+# stream completion
+for result in phind.StreamingCompletion.create(
+    model  = 'gpt-3.5',
+    prompt = prompt,
+    results     = phind.Search.create(prompt, actualSearch = True), # create search (set actualSearch to False to disable internet)
+    creative    = False,
+    detailed    = False,
+    codeContext = ''):  # up to 3000 chars of code
+
+    print(result.completion.choices[0].text, end='', flush=True)