소스 검색

initial commit

david 11 달 전
커밋
c4fd377f47
2개의 변경된 파일181개의 추가작업 그리고 0개의 파일을 삭제
  1. 0 0
      README.md
  2. 181 0
      main.py

+ 0 - 0
README.md


+ 181 - 0
main.py

@@ -0,0 +1,181 @@
+import requests
+from bs4 import BeautifulSoup
+import json
+import argparse
+from rich.console import Console
+from rich.markdown import Markdown
+
+def duckduckgo_search(query, num_results=5):
+    # Construct the DuckDuckGo URL for the search query
+    url = f"https://html.duckduckgo.com/html/?q={query}"
+    
+    # Send a GET request to the DuckDuckGo search page
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+    }
+    
+    response = requests.get(url, headers=headers)
+    
+    # Check if the request was successful
+    if response.status_code != 200:
+        print(f"Failed to retrieve search results. Status code: {response.status_code}")
+        return []
+    
+    # Parse the HTML content using BeautifulSoup
+    soup = BeautifulSoup(response.content, 'html.parser')
+    
+    # Find all result links (assuming they are in <a> tags with class "result__a")
+    result_links = []
+    for a_tag in soup.find_all('a', class_='result__a'):
+        link = a_tag.get('href')
+        if link:
+            result_links.append(link)
+            if len(result_links) >= num_results:
+                break
+    
+    return result_links
+
+
+def extract_text_from_links(links):
+    extracted_texts = []
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+    }
+    
+    for link in links:
+        try:
+            response = requests.get(link, headers=headers)
+            if response.status_code == 200:
+                soup = BeautifulSoup(response.content, 'html.parser')
+                # Extract text from the page
+                text = soup.get_text(separator='\n', strip=True)
+                extracted_texts.append((link, text))
+            else:
+                print(f"Failed to retrieve content from {link}. Status code: {response.status_code}")
+        except requests.RequestException as e:
+            print(f"An error occurred while fetching {link}: {e}")
+    
+    return extracted_texts
+
+
+def summarize_individual_texts(texts_and_urls, query, ollama_url="http://localhost:11434/api/generate"):
+    summaries = []
+    for url, text in texts_and_urls:
+        prompt = f"Extract the relevant information from the following text with regards to the original \
+        query: '{query}'\n\n{text}\n"
+        payload = {
+            "model": "command-r",
+            "prompt": prompt,
+            "stream": False,
+            "max_tokens": 1000
+        }
+        
+        try:
+            response = requests.post(ollama_url, json=payload)
+            if response.status_code == 200:
+                result = json.loads(response.text)["response"]
+                summaries.append((url, result))
+            else:
+                print(f"Failed to get summary from Ollama server for {url}. Status code: {response.status_code}")
+        except requests.RequestException as e:
+            print(f"An error occurred while sending request to Ollama server for {url}: {e}")
+    
+    return summaries
+
+
+def summarize_with_ollama(texts_and_urls, query, ollama_url="http://localhost:11434/api/generate"):
+    # Prepare the context and prompt
+    context = "\n".join([f"URL: {url}\nText: {text}" for url, text in texts_and_urls])
+    prompt = f"Summarize the following search results with regards to the original query: '{query}' \
+        and include the full URLs as references where appropriate. Use markdown to format your response and unicode characters. \
+        \n\n{context}"
+    
+    # Create the payload for the POST request
+    payload = {        
+        "model": "command-r",
+        "prompt": prompt,
+        "stream": False,
+        "max_tokens": 1500
+    }
+    
+    # Send the POST request to the Ollama server
+    try:
+        print("Processing")
+        response = requests.post(ollama_url, json=payload)
+        if response.status_code == 200:
+            result = json.loads(response.text)["response"]
+            return result
+        else:
+            print(f"Failed to get summary from Ollama server. Status code: {response.status_code}")
+            return None
+    except requests.RequestException as e:
+        print(f"An error occurred while sending request to Ollama server: {e}")
+        return None
+
+
+def optimize_search_query(query, ollama_url="http://localhost:11434/api/generate"):
+    # Prepare the prompt for optimizing the search query
+    prompt = f"Optimize the following natural language query to improve its effectiveness in a web search.\
+        Make it very concise. query: '{query}'"
+    
+    # Create the payload for the POST request
+    payload = {        
+        "model": "command-r",
+        "prompt": prompt,
+        "stream": False,
+        "max_tokens": 50
+    }
+    
+    # Send the POST request to the Ollama server
+    try:
+        print("Optimizing search query")
+        response = requests.post(ollama_url, json=payload)
+        if response.status_code == 200:
+            result = json.loads(response.text)["response"].strip()
+            return result.strip('"')
+        else:
+            print(f"Failed to optimize search query from Ollama server. Status code: {response.status_code}")
+            return query
+    except requests.RequestException as e:
+        print(f"An error occurred while sending request to Ollama server for optimizing the search query: {e}")
+        return query
+
+
+def pretty_print_markdown(markdown_text):
+    console = Console()
+    md = Markdown(markdown_text)
+    console.print(md)
+    
+
+if __name__ == "__main__":
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description="Search DuckDuckGo, extract text from results, and summarize with Ollama.")
+    parser.add_argument("query", type=str, help="The search query to use on DuckDuckGo")
+    parser.add_argument("--num_results", type=int, default=5, help="Number of search results to process (default: 5)")
+    
+    # Parse arguments
+    args = parser.parse_args()
+    
+    original_query = args.query
+    # Optimize the search query
+    optimized_query = optimize_search_query(original_query)
+    print(f"Original Query: {original_query}")
+    print(f"Optimized Query: {optimized_query}")
+
+    n = args.num_results  # Number of results to extract
+    links = duckduckgo_search(optimized_query, n)
+    
+    print(f"Top {n} search results:")
+    for i, link in enumerate(links, start=1):
+        print(f"{i}. {link}")
+    
+    texts_and_urls = extract_text_from_links(links)
+    
+    print("Summarizing individual search results")
+    intermediate_summaries = summarize_individual_texts(texts_and_urls, original_query)
+    
+    final_summary = summarize_with_ollama(intermediate_summaries, original_query)
+    
+    if final_summary:
+        print("\nFinal Summary of search results:\n")
+        pretty_print_markdown(final_summary)