Pārlūkot izejas kodu

add requests.get timeout

david 11 mēneši atpakaļ
vecāks
revīzija
18ea2d87a7
1 mainītis faili ar 3 papildinājumiem un 2 dzēšanām
  1. 3 2
      main.py

+ 3 - 2
main.py

@@ -36,15 +36,16 @@ def duckduckgo_search(query, num_results=5):
     return result_links
 
 
-def extract_text_from_links(links):
+def extract_text_from_links(links, timeout=5):
     extracted_texts = []
     headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
     }
     
     for link in links:
+        print("downloading text from: " + link)
         try:
-            response = requests.get(link, headers=headers)
+            response = requests.get(link, headers=headers, timeout=timeout)
             if response.status_code == 200:
                 soup = BeautifulSoup(response.content, 'html.parser')
                 # Extract text from the page