Browse Source

add api warm up for localhost

david 4 weeks ago
parent
commit
da7b360a32
1 changed files with 28 additions and 0 deletions
  1. 28 0
      sllm

+ 28 - 0
sllm

@@ -189,12 +189,40 @@ def pretty_print_markdown(markdown_text):
     console.print(md)
 
 
+def warm_up_api(summary_model, api_base):
+    """start loading the summary model while the user types"""
+    prompt = f"Hi there"
+
+    client = OpenAI(
+        base_url=api_base,
+        api_key=token
+    )
+    
+    history = [{"role": "user", "content": prompt}]
+
+    # Send the POST request to the server
+    try:
+        print(f"Request loading the summary model {summary_model}")
+        client.chat.completions.create(
+            model=summary_model,
+            messages=history,
+            temperature=0,
+            max_tokens=10
+        )       
+    except Exception as e:
+        print(
+            f"An error occurred while sending request to the server for initially loading the summary model: {e}")
+        return query
+    
 if __name__ == "__main__":
     load_dotenv()
     token = os.getenv("OVH_AI_ENDPOINTS_ACCESS_TOKEN")
     api_base = os.getenv("API_BASE")    
     summary_model = os.getenv("SUMMARY_MODEL")
 
+    if "localhost" in api_base:
+        warm_up_api(summary_model=summary_model, api_base=api_base)
+
     questions = [
         inquirer.Text('query', message="What's your query?"),
         inquirer.Text('explanation', message="Additional context for the query?")