4 weeks ago · da7b360a32
--- a/sllm
+++ b/sllm
@@ -189,12 +189,40 @@ def pretty_print_markdown(markdown_text):
 
				     console.print(md)
			
 
				 
			
 
				 
			
 
				+def warm_up_api(summary_model, api_base):
			
 
				+    """start loading the summary model while the user types"""
			
 
				+    prompt = f"Hi there"
			
 
				+
			
 
				+    client = OpenAI(
			
 
				+        base_url=api_base,
			
 
				+        api_key=token
			
 
				+    )
			
 
				+    
			
 
				+    history = [{"role": "user", "content": prompt}]
			
 
				+
			
 
				+    # Send the POST request to the server
			
 
				+    try:
			
 
				+        print(f"Request loading the summary model {summary_model}")
			
 
				+        client.chat.completions.create(
			
 
				+            model=summary_model,
			
 
				+            messages=history,
			
 
				+            temperature=0,
			
 
				+            max_tokens=10
			
 
				+        )       
			
 
				+    except Exception as e:
			
 
				+        print(
			
 
				+            f"An error occurred while sending request to the server for initially loading the summary model: {e}")
			
 
				+        return query
			
 
				+    
			
 
				 if __name__ == "__main__":
			
 
				     load_dotenv()
			
 
				     token = os.getenv("OVH_AI_ENDPOINTS_ACCESS_TOKEN")
			
 
				     api_base = os.getenv("API_BASE")    
			
 
				     summary_model = os.getenv("SUMMARY_MODEL")
			
 
				 
			
 
				+    if "localhost" in api_base:
			
 
				+        warm_up_api(summary_model=summary_model, api_base=api_base)
			
 
				+
			
 
				     questions = [
			
 
				         inquirer.Text('query', message="What's your query?"),
			
 
				         inquirer.Text('explanation', message="Additional context for the query?")