-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp.py
380 lines (288 loc) · 13.5 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
import os
from dotenv import load_dotenv
from streamlit_js_eval import get_geolocation
from pymongo.mongo_client import MongoClient
import pandas as pd
import numpy as np
import matplotlib.colors as mcolors
import streamlit as st
import openai
from langchain import PromptTemplate
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.prompts import MessagesPlaceholder
from langchain.memory import ConversationSummaryBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.tools import BaseTool
from langchain.schema import SystemMessage
from langchain.callbacks import get_openai_callback
from pydantic import BaseModel, Field
from typing import Type
from bs4 import BeautifulSoup
import requests
import json
from fastapi import FastAPI
import tweepy
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.Model.list()
browserless_api_key = os.getenv("BROWSERLESS_API_KEY")
serper_api_key = os.getenv("SERP_API_KEY")
uri = os.getenv("MONGODB_ACCESS")
cluster = MongoClient(uri)
#twitter api set up
#client = tweepy.Client(str(os.getenv("BEAR_TOKEN")), wait_on_rate_limit=True)
client = tweepy.Client(bearer_token = str(os.getenv("BEARER_TOKEN")),
consumer_key = str(os.getenv('TWITTER_API_KEY')),
consumer_secret = str(os.getenv('TWITTER_API_KEY_SECRET')),
access_token = str(os.getenv('ACCESS_TOKEN')),
access_token_secret = str(os.getenv('ACCESS_TOKEN_SECRET')),
wait_on_rate_limit = False)
#defining the llm
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k")
# 2. Tool for search
def search(query):
url = "https://google.serper.dev/search"
payload = json.dumps({
"q": query
})
headers = {
'X-API-KEY': serper_api_key,
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
return response.text
# 3. Tool for scraping
def scrape_website(objective: str, url: str):
# scrape website, and also will summarize the content based on objective if the content is too large
# objective is the original objective & task that user give to the agent, url is the url of the website to be scraped
print("Scraping website...")
# Define the headers for the request
headers = {
'Cache-Control': 'no-cache',
'Content-Type': 'application/json',
}
# Define the data to be sent in the request
data = {
"url": url
}
# Convert Python object to JSON string
data_json = json.dumps(data)
# Send the POST request
post_url = f"https://chrome.browserless.io/content?token={browserless_api_key}"
response = requests.post(post_url, headers=headers, data=data_json)
# Check the response status code
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
text = soup.get_text()
print("THIS CONTENT:", text)
if len(text) > 10000:
output = summary(objective, text)
return output
else:
return text
else:
print(f"HTTP request failed with status code {response.status_code}")
def summary(objective, content):
text_splitter = RecursiveCharacterTextSplitter(
separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)
docs = text_splitter.create_documents([content])
map_prompt = """
Write a summary of the following text for {objective}:
"{text}"
SUMMARY:
"""
map_prompt_template = PromptTemplate(
template=map_prompt, input_variables=["text", "objective"])
summary_chain = load_summarize_chain(
llm=llm,
chain_type='map_reduce',
map_prompt=map_prompt_template,
combine_prompt=map_prompt_template,
verbose=True
)
output = summary_chain.run(input_documents=docs, objective=objective)
return output
class ScrapeWebsiteInput(BaseModel):
"""Inputs for scrape_website"""
objective: str = Field(
description="The objective & task that users give to the agent")
url: str = Field(description="The url of the website to be scraped")
class ScrapeWebsiteTool(BaseTool):
name = "scrape_website"
description = "useful when you need to get data from a website url, pass both the objective and url to the function; DO NOT make up any url, the url shall only be from the search results"
args_schema: Type[BaseModel] = ScrapeWebsiteInput
def _run(self, objective: str, url: str):
return scrape_website(objective, url)
def _arun(self, objective: str, url: str):
raise NotImplementedError("error here")
def location():
loc = get_geolocation()
return loc
def split_string(s, limit):
lines = s.split("\n")
strings = []
for line in lines:
words = line.split()
if not words: # If the line is empty, continue to the next one
continue
current_string = words[0]
for word in words[1:]:
if len(current_string) + len(word) + 1 > limit: # +1 for the space
strings.append(current_string)
current_string = word
else:
current_string += ' ' + word
strings.append(current_string) # append the last string
return strings
tools = [
Tool(
name="Search",
func=search,
description="Useful for when you need to answer questions regarding all topics. You shall ask targeted questions."
),
ScrapeWebsiteTool(),
]
memory = ConversationSummaryBufferMemory(
memory_key="memory", return_messages=True, llm=llm, max_token_limit=1000)
system_message = SystemMessage(
content=f"""You are a top world class researcher who works for the United States of America and is cleared to receive any internal information on any topic in every company or organization on Earth. You will conduct extremely detailed research on any topic and produce facts based on results;
you do not hallucinate, you will try as hard as possible to gather facts & data to back up and prove the research.
Please make sure you complete the objective above with the following rules:
1/ You shall do enough research to gather as much information as possible about the objective.
2/ If there are urls of relevant links & articles, you will scrape it to gather more information about the objective.
3/ After scraping & search, you shall think "Is there any new information I should search & scrape based on the data I collected to increase the research quality?" If the answer is yes, continue; But do not do this more than 5 iterations.
4/ You shall not hallucinate, you shall only write facts with the data that you have gathered.
5/ In the final output, You shall include citations of all reference data & links to back up and prove your research; You shall include citations of all reference data & links to back up and prove your research.
6/ In the final output, You shall include citations of all reference data & links to back up and prove your research; You shall include citations of all reference data & links to back up and prove your research."""
)
agent_kwargs = {
"extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
"system_message": system_message,
}
#initializing chat history
global chat_history
chat_history = []
agent = initialize_agent(
tools,
llm,
agent=AgentType.OPENAI_FUNCTIONS,
verbose=True,
agent_kwargs= agent_kwargs,
memory= memory,
chat_history = chat_history
)
def run_agent(query):
global chat_history
total = 0.0
with get_openai_callback() as cb:
result = agent({"input": query})
total += cb.total_cost
return result, total
#4. Use streamlit to create a web app
def main():
global chat_history
st.set_page_config(page_title="AI Research Agent", page_icon=":robot_face:")
with st.sidebar:
st.title(":robot_face: AI Research Agent")
st.write("As an AI Research Agent I will try my very best to answer to all of your queries!")
st.write(":man-bowing: Note* this AI agent uses OpenAI gpt-3.5-turbo-16k LLM, therefore queries are limited to 16,384 tokens.")
with open("WebGPT.pdf", "rb") as pdf_file:
PDFbyte = pdf_file.read()
st.write(":open_book: This project was inspired by and builds upon the methodologies and findings presented in the WebGPT research paper published by OpenAI.")
st.download_button(label=":page_facing_up: Download WebGPT Paper as PDF",
data=PDFbyte,
file_name="WebGPT.pdf",
mime='application/octet-stream')
st.divider()
db = cluster["user_location"]
collection = db["location"]
latitude_array = []
longitude_array = []
if st.checkbox("See people using our Research Agents"):
this_location = location()
try:
latitude = this_location["coords"]["latitude"]
longitude = this_location["coords"]["longitude"]
collection.insert_one({"latitude" : latitude, "longitude" : longitude})
except TypeError as f:
st.info(":rotating_light: Couldn't get your location, try allowing permissions for accessing location :rotating_light:")
finally:
st.subheader("Users currently using our Research Agents:")
database_data = collection.find({})
for this_data in database_data:
latitude_array.append(this_data["latitude"])
longitude_array.append(this_data["longitude"])
location_dict = {"col1": latitude_array, "col2": longitude_array, "col3": 5, "col4" : np.random.rand(len(latitude_array), 4).tolist()}
df1 = pd.DataFrame(location_dict)
st.map(df1, latitude = "col1", longitude = "col2", size = "col3", color = "col4")
st.divider()
st.caption("Made with :white_heart: by Caleb Kan")
st.caption(":email: email: [email protected]")
st.caption(":bird: twitter: @calebkan_")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = [{"role": "assistant", "content": "Hi, how can I help you with your research?", "message_cost": 0.0}]
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
try:
st.info(message["twitter_message"])
except:
pass
try:
this_cost = message["message_cost"]
cost_message = f"This response costed: ${this_cost} USD"
st.code(cost_message, language = 'python')
except:
pass
# React to user input
if query := st.chat_input("Send a message"):
# Display user message in chat message container
st.chat_message("user").markdown(query)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": query})
result, this_cost = run_agent(query)
# Display assistant response in chat message container
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
for i in range(0, len(result["output"])):
full_response += result["output"][i]
message_placeholder.markdown(full_response + "▌")
message_placeholder.markdown(result["output"])
tweet_array = split_string(str(result['output']), 280)
count = 0
for i in range(0, len(tweet_array)):
try:
client.create_tweet(text = tweet_array[i])
count += 1
except:
twitter_message = ":rotating_light: Twitter tweets has reached the rate limit. Please wait for available quota to view full response on twitter. :rotating_light:"
st.info(twitter_message)
break
if count == len(tweet_array):
twitter_message = ":bird: View full response on twitter via @calebkan_"
st.info(twitter_message)
cost_message = f"This response costed: ${this_cost} USD"
st.code(cost_message, language = 'python')
st.session_state.messages.append({"role": "assistant", "content": result["output"], "twitter_message": twitter_message, "message_cost": this_cost})
file_chain = ""
total = 0.0
for message in st.session_state.messages:
if message["role"] == "assistant":
try:
total += float(message["message_cost"])
except:
pass
finally:
file_chain += (str(message["content"]) + "\n")
cost = f'''Total accumulated response cost: ${total} USD'''
st.code(cost, language = 'python')
st.download_button(label = ':page_facing_up: Download Chat Response(s) as TXT', data = file_chain, file_name = 'Chat_Response(s).txt')
if __name__ == "__main__":
main()
app = FastAPI()