🌐 AI搜索 & 代理 主页
Skip to content

Commit 5f1a2dc

Browse files
committed
Added rag-retrieval-timing-tests
1 parent 0842673 commit 5f1a2dc

File tree

11 files changed

+555
-0
lines changed

11 files changed

+555
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
PINECONE_API_KEY=
2+
QDRANT_API_KEY=
3+
ZILLIZ_API_KEY=
4+
WCS_API_KEY=
5+
OPENAI_API_KEY=
6+
HF_TOKEN=
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Rag Timing Tests
2+
3+
This script runs timing tests for common rag systems.
4+
5+
To run it copy `.env.deveopment` to `.env` and make sure to set the appropriate variables in the `.env` file, install the dependencies in `requirements.txt` and run `python3 __main__.py`.
6+
7+
Notice that this script assumes certain actions to create databases or setup "collections" have been performed for each cloud provider. See the script for more details.
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import time
2+
import asyncio
3+
4+
import postgresml as pgl
5+
import zilliz_local as zl
6+
import pinecone_local as pl
7+
import qdrant_local as ql
8+
import openai_local as al
9+
import huggingface as hf
10+
import weaviate_local as wl
11+
12+
TRIAL_COUNT = 2
13+
14+
# The pairs we are testing with
15+
tests = [
16+
{
17+
"name": "PostgresML",
18+
"vector_store": pgl,
19+
"rag+": True,
20+
"chatbot_service": al,
21+
"async": True,
22+
},
23+
{"name": "Weaviate", "vector_store": wl, "chatbot_service": al, "rag++": True},
24+
{
25+
"name": "Zilliz",
26+
"vector_store": zl,
27+
"embedding_service": hf,
28+
"chatbot_service": al,
29+
},
30+
{
31+
"name": "Pinecone",
32+
"vector_store": pl,
33+
"embedding_service": hf,
34+
"chatbot_service": al,
35+
},
36+
{
37+
"name": "Qdrant",
38+
"vector_store": ql,
39+
"embedding_service": hf,
40+
"chatbot_service": al,
41+
},
42+
]
43+
44+
45+
# Our documents
46+
# We only really need to test on 2. When we search we are trying to get the first document back
47+
documents = [
48+
{"id": "0", "metadata": {"text": "The hidden value is 1000"}},
49+
{
50+
"id": "1",
51+
"metadata": {"text": "This is just some random text"},
52+
},
53+
]
54+
55+
56+
def maybe_do_async(func, check_dict, *args):
57+
if "async" in check_dict and check_dict["async"]:
58+
return asyncio.run(func(*args))
59+
else:
60+
return func(*args)
61+
62+
63+
def do_data_upsert(name, vector_store, **kwargs):
64+
print(f"Doing Data Upsert For: {name}")
65+
if "rag++" in kwargs or "rag+" in kwargs:
66+
maybe_do_async(vector_store.upsert_data, kwargs, documents)
67+
else:
68+
texts = [d["metadata"]["text"] for d in documents]
69+
(embeddings, time_to_embed) = kwargs["embedding_service"].get_embeddings(texts)
70+
maybe_do_async(vector_store.upsert_data, kwargs, documents, embeddings)
71+
print(f"Done Doing Data Upsert For: {name}\n")
72+
73+
74+
def do_normal_rag_test(name, vector_store, **kwargs):
75+
print(f"Doing RAG Test For: {name}")
76+
query = "What is the hidden value?"
77+
if "rag++" in kwargs:
78+
(result, time_to_complete) = maybe_do_async(
79+
vector_store.get_llm_response, kwargs, query
80+
)
81+
time_to_embed = 0
82+
time_to_search = 0
83+
elif "rag+" in kwargs:
84+
time_to_embed = 0
85+
(context, time_to_search) = maybe_do_async(
86+
vector_store.do_search, kwargs, query
87+
)
88+
(result, time_to_complete) = kwargs["chatbot_service"].get_llm_response(
89+
query, context
90+
)
91+
else:
92+
(embeddings, time_to_embed) = kwargs["embedding_service"].get_embeddings(
93+
[query]
94+
)
95+
(context, time_to_search) = vector_store.do_search(embeddings[0])
96+
(result, time_to_complete) = kwargs["chatbot_service"].get_llm_response(
97+
query, context
98+
)
99+
print(f"\tThe LLM Said: {result}")
100+
time_for_retrieval = time_to_embed + time_to_search
101+
total_time = time_to_embed + time_to_search + time_to_complete
102+
print(f"Done Doing RAG Test For: {name}")
103+
print(f"- Time to Embed: {time_to_embed}")
104+
print(f"- Time to Search: {time_to_search}")
105+
print(f"- Total Time for Retrieval: {time_for_retrieval}")
106+
print(f"- Time for Chatbot Completion: {time_to_complete}")
107+
print(f"- Total Time Taken: {total_time}\n")
108+
return {
109+
"time_to_embed": time_to_embed,
110+
"time_to_search": time_to_search,
111+
"time_for_retrieval": time_for_retrieval,
112+
"time_to_complete": time_to_complete,
113+
"total_time": total_time,
114+
}
115+
116+
117+
if __name__ == "__main__":
118+
print("----------Doing Data Setup-------------------------\n")
119+
for test in tests:
120+
do_data_upsert(**test)
121+
print("\n----------Done Doing Data Setup------------------\n\n")
122+
123+
print("----------Doing Rag Tests-------------------------\n")
124+
stats = {}
125+
for i in range(TRIAL_COUNT):
126+
for test in tests:
127+
times = do_normal_rag_test(**test)
128+
if not test["name"] in stats:
129+
stats[test["name"]] = []
130+
stats[test["name"]].append(times)
131+
print("\n----------Done Doing Rag Tests---------------------\n")
132+
133+
print("------------Final Results---------------------------\n")
134+
for test in tests:
135+
trials = stats[test["name"]]
136+
(
137+
time_to_embed,
138+
time_to_search,
139+
time_for_retrieval,
140+
time_to_complete,
141+
total_time,
142+
) = [
143+
sum(trial[key] for trial in trials)
144+
for key in [
145+
"time_to_embed",
146+
"time_to_search",
147+
"time_for_retrieval",
148+
"time_to_complete",
149+
"total_time",
150+
]
151+
]
152+
print(f'Done Doing RAG Test For: {test["name"]}')
153+
print(f"- Average Time to Embed: {(time_to_embed / TRIAL_COUNT):0.4f}")
154+
print(f"- Average Time to Search: {(time_to_search / TRIAL_COUNT):0.4f}")
155+
print(
156+
f"- Average Total Time for Retrieval: {(time_for_retrieval / TRIAL_COUNT):0.4f}"
157+
)
158+
print(
159+
f"- Average Time for Chatbot Completion: {(time_to_complete / TRIAL_COUNT):0.4f}"
160+
)
161+
print(f"- Average Total Time Taken: {(total_time / TRIAL_COUNT):0.4f}\n")
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import requests
2+
import time
3+
import os
4+
import sys
5+
from dotenv import load_dotenv
6+
7+
# Load our environment variables
8+
load_dotenv()
9+
HF_TOKEN = os.getenv("HF_TOKEN")
10+
11+
12+
# Get the embedding from HuggingFace
13+
def get_embeddings(inputs):
14+
print("\tGetting embeddings from HuggingFace")
15+
tic = time.perf_counter()
16+
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
17+
payload = {"inputs": inputs}
18+
response = requests.post(
19+
"https://api-inference.huggingface.co/pipeline/feature-extraction/intfloat/e5-small",
20+
headers=headers,
21+
json=payload,
22+
)
23+
toc = time.perf_counter()
24+
time_taken = toc - tic
25+
print(f"\tDone getting embeddings: {toc - tic:0.4f}\n")
26+
response = response.json()
27+
if "error" in response:
28+
sys.exit(response)
29+
return (response, time_taken)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from openai import OpenAI
2+
import time
3+
4+
# Create our OpenAI client
5+
client = OpenAI()
6+
7+
8+
# Get LLM response from OpenAI
9+
def get_llm_response(query, context):
10+
print("\tGetting LLM response from OpenAI")
11+
tic = time.perf_counter()
12+
completion = client.chat.completions.create(
13+
model="gpt-3.5-turbo",
14+
messages=[
15+
{
16+
"role": "system",
17+
"content": f"You are a helpful assistant. Given the context, provide an answer to the user: \n{context}",
18+
},
19+
{"role": "user", "content": query},
20+
],
21+
)
22+
toc = time.perf_counter()
23+
time_taken = toc - tic
24+
print(f"\tDone getting the LLM response: {time_taken:0.4f}")
25+
response = completion.choices[0].message.content
26+
return (response, time_taken)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from pinecone import Pinecone, ServerlessSpec
2+
from dotenv import load_dotenv
3+
import time
4+
import os
5+
6+
# Load our environment variables
7+
load_dotenv()
8+
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
9+
10+
# Create our Pinecone client
11+
# Note we created their default index using their gcp-start region and us-central1 region
12+
pc = Pinecone(api_key=PINECONE_API_KEY)
13+
index = pc.Index("test")
14+
15+
16+
# Store some initial documents to retrieve
17+
def upsert_data(documents, embeddings):
18+
for document, embedding in zip(documents, embeddings):
19+
document["values"] = embedding
20+
print("\tStarting PineCone upsert")
21+
tic = time.perf_counter()
22+
index.upsert(documents, namespace="ns1")
23+
toc = time.perf_counter()
24+
time_taken_to_upsert = toc - tic
25+
print(f"\tDone PineCone upsert: {time_taken_to_upsert:0.4f}")
26+
return time_taken_to_upsert
27+
28+
29+
# Do cosine similarity search over our pinecone index
30+
def do_search(vector):
31+
print("\tDoing cosine similarity search with PineCone")
32+
tic = time.perf_counter()
33+
results = index.query(
34+
namespace="ns1",
35+
vector=vector,
36+
top_k=1,
37+
include_metadata=True,
38+
)
39+
toc = time.perf_counter()
40+
time_done = toc - tic
41+
print(f"\tDone doing cosine similarity search: {time_done:0.4f}\n")
42+
result = results["matches"][0]["metadata"]["text"]
43+
return (result, time_done)
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from pgml import Collection, Pipeline
2+
from dotenv import load_dotenv
3+
import time
4+
5+
# Load our environment variables
6+
load_dotenv()
7+
8+
# Initialize our Collection and Pipeline
9+
collection = Collection("test_collection")
10+
pipeline = Pipeline(
11+
"test_pipeline",
12+
{
13+
"text": {
14+
"semantic_search": {
15+
"model": "intfloat/e5-small",
16+
},
17+
}
18+
},
19+
)
20+
21+
22+
# Add the Pipeline to our collection
23+
# We only need to do this once
24+
async def setup_pipeline():
25+
await collection.add_pipeline(pipeline)
26+
27+
28+
async def upsert_data(documents):
29+
documents = [
30+
{"id": document["id"], "text": document["metadata"]["text"]}
31+
for document in documents
32+
]
33+
print("Starting PostgresML upsert")
34+
tic = time.perf_counter()
35+
await collection.upsert_documents(documents)
36+
toc = time.perf_counter()
37+
time_taken = toc - tic
38+
print(f"Done PostgresML upsert: {time_taken:0.4f}\n")
39+
40+
41+
async def do_search(query):
42+
print(
43+
"\tDoing embedding and cosine similarity search over our PostgresML Collection"
44+
)
45+
tic = time.perf_counter()
46+
results = await collection.vector_search(
47+
{
48+
"query": {
49+
"fields": {
50+
"text": {
51+
"query": query,
52+
},
53+
}
54+
},
55+
"limit": 1,
56+
},
57+
pipeline,
58+
)
59+
toc = time.perf_counter()
60+
time_taken = toc - tic
61+
print(f"\tDone doing embedding and cosine similarity search: {time_taken:0.4f}\n")
62+
return (results[0]["chunk"], time_taken)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from qdrant_client import QdrantClient
2+
from qdrant_client.models import Distance, VectorParams, PointStruct
3+
from dotenv import load_dotenv
4+
import time
5+
import os
6+
7+
# Load our environment variables
8+
load_dotenv()
9+
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
10+
11+
# Create our Qdrant client
12+
qdrant = QdrantClient(
13+
url="https://059364f6-62c5-4f80-9f19-cf6d6394caae.us-east4-0.gcp.cloud.qdrant.io:6333",
14+
api_key=QDRANT_API_KEY,
15+
)
16+
17+
# Create our Qdrant collection
18+
qdrant.recreate_collection(
19+
collection_name="test",
20+
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
21+
)
22+
23+
24+
# Store some initial documents to retrieve
25+
def upsert_data(documents, embeddings):
26+
points = [
27+
PointStruct(
28+
id=int(document["id"]), vector=embedding, payload=document["metadata"]
29+
)
30+
for document, embedding in zip(documents, embeddings)
31+
]
32+
print("\tStarting Qdrant upsert")
33+
tic = time.perf_counter()
34+
qdrant.upsert(collection_name="test", points=points)
35+
toc = time.perf_counter()
36+
time_taken_to_upsert = toc - tic
37+
print(f"\tDone Qdrant upsert: {time_taken_to_upsert:0.4f}")
38+
return time_taken_to_upsert
39+
40+
41+
# Do cosine similarity search over our Qdrant collection
42+
def do_search(vector):
43+
print("\tDoing cosine similarity search with Qdrant")
44+
tic = time.perf_counter()
45+
results = qdrant.search(collection_name="test", query_vector=vector, limit=1)
46+
toc = time.perf_counter()
47+
time_done = toc - tic
48+
print(f"\tDone doing cosine similarity search: {time_done:0.4f}\n")
49+
return (results, time_done)

0 commit comments

Comments
 (0)