import os import cohere import torch from transformers import AutoModel, AutoTokenizer from datasets import load_dataset #initialze COHERE os.environ["COHERE_API_KEY"] = "Ef4JbSQqzi4" co = cohere.Client() #initialize E5 device = "cuda" if torch.cuda.is_available() else "cpu" model_id = "infloat/e5-base-v2" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModel.from_pretrained(model_id).to(device) model.eval() #load premade dataset from hugging face data = load_dataset( "jamescalam/ai-arxiv-chunked", split="train", )