From 721345f2856c2b334f67520d21de5822990354a3 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Fri, 18 Oct 2024 10:27:41 -0400 Subject: [PATCH] Remove scripts committed accidentally --- app.py | 31 ----------------------------- app2.py | 16 --------------- app3.py | 62 --------------------------------------------------------- 3 files changed, 109 deletions(-) delete mode 100644 app.py delete mode 100644 app2.py delete mode 100644 app3.py diff --git a/app.py b/app.py deleted file mode 100644 index 562241c8..00000000 --- a/app.py +++ /dev/null @@ -1,31 +0,0 @@ -from pinecone.grpc import PineconeGRPC, GRPCClientConfig - -# Initialize a client. An API key must be passed, but the -# value does not matter. -pc = PineconeGRPC(api_key="test_api_key") - -# Target the indexes. Use the host and port number along with disabling tls. -index1 = pc.Index(host="localhost:5081", grpc_config=GRPCClientConfig(secure=False)) -index2 = pc.Index(host="localhost:5082", grpc_config=GRPCClientConfig(secure=False)) - -# You can now perform data plane operations with index1 and index2 - -dimension = 3 - - -def upserts(): - vectors = [] - for i in range(0, 100): - vectors.append((f"vec{i}", [i] * dimension)) - - print(len(vectors)) - - index1.upsert(vectors=vectors, namespace="ns2") - index2.upsert(vectors=vectors, namespace="ns2") - - -upserts() -print(index1.describe_index_stats()) - -print(index1.query(id="vec1", top_k=2, namespace="ns2", include_values=True)) -print(index1.query(id="vec1", top_k=10, namespace="", include_values=True)) diff --git a/app2.py b/app2.py deleted file mode 100644 index c8349e70..00000000 --- a/app2.py +++ /dev/null @@ -1,16 +0,0 @@ -from pinecone.grpc import PineconeGRPC -from pinecone import Pinecone - -pc = Pinecone(api_key="b1cb8ba4-b3d1-458f-9c32-8dd10813459a") -pcg = PineconeGRPC(api_key="b1cb8ba4-b3d1-458f-9c32-8dd10813459a") - -index = pc.Index("jen2") -indexg = pcg.Index(name="jen2", use_asyncio=True) - -# Rest call fails -# print(index.upsert(vectors=[("vec1", [1, 2])])) - -# GRPC succeeds -print(indexg.upsert(vectors=[("vec1", [1, 2])])) - -# print(index.fetch(ids=['vec1'])) diff --git a/app3.py b/app3.py deleted file mode 100644 index 5e49daff..00000000 --- a/app3.py +++ /dev/null @@ -1,62 +0,0 @@ -import asyncio -from pinecone.grpc import PineconeGRPC as Pinecone, Vector - -import time -import random -import pandas as pd - - -# Enable gRPC tracing and verbosity for more detailed logs -# os.environ["GRPC_VERBOSITY"] = "DEBUG" -# os.environ["GRPC_TRACE"] = "all" - - -# Generate a large set of vectors (as an example) -def generate_vectors(num_vectors, dimension): - return [ - Vector(id=f"vector_{i}", values=[random.random()] * dimension) for i in range(num_vectors) - ] - - -def load_vectors(): - df = pd.read_parquet("test_records_100k_dim1024.parquet") - df["values"] = df["values"].apply(lambda x: [float(v) for v in x]) - - vectors = [Vector(id=row.id, values=list(row.values)) for row in df.itertuples()] - return vectors - - -async def main(): - # Create a semaphore to limit concurrency (e.g., max 5 concurrent requests) - s = time.time() - # all_vectors = load_vectors() - all_vectors = generate_vectors(1000, 1024) - f = time.time() - print(f"Loaded {len(all_vectors)} vectors in {f-s:.2f} seconds") - - start_time = time.time() - - # Same setup as before... - pc = Pinecone(api_key="b1cb8ba4-b3d1-458f-9c32-8dd10813459a") - index = pc.Index( - # index_host="jen2-dojoi3u.svc.aped-4627-b74a.pinecone.io" - host="jen1024-dojoi3u.svc.apw5-4e34-81fa.pinecone.io", - use_asyncio=True, - ) - - batch_size = 150 - namespace = "asyncio-py7" - res = await index.upsert( - vectors=all_vectors, batch_size=batch_size, namespace=namespace, show_progress=True - ) - - print(res) - - end_time = time.time() - - total_time = end_time - start_time - print(f"All tasks completed in {total_time:.2f} seconds") - print(f"Namespace: {namespace}") - - -asyncio.run(main())