RAG-AI/EmbeddingModels/PineconeEmbeddingModel.py

26 lines
551 B
Python
Raw Permalink Normal View History

2025-11-24 15:26:15 -06:00
import os
import cohere
import torch
from transformers import AutoModel, AutoTokenizer
from datasets import load_dataset
#initialze COHERE
os.environ["COHERE_API_KEY"] = "Ef4JbSQqzi4"
co = cohere.Client()
#initialize E5
device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "infloat/e5-base-v2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id).to(device)
model.eval()
#load premade dataset from hugging face
data = load_dataset(
"jamescalam/ai-arxiv-chunked",
split="train",
)