Using ONNX Models
Supported Models
Enum Variant |
Description |
AllMiniLML6V2 |
sentence-transformers/all-MiniLM-L6-v2 |
AllMiniLML6V2Q |
Quantized sentence-transformers/all-MiniLM-L6-v2 |
AllMiniLML12V2 |
sentence-transformers/all-MiniLM-L12-v2 |
AllMiniLML12V2Q |
Quantized sentence-transformers/all-MiniLM-L12-v2 |
BGEBaseENV15 |
BAAI/bge-base-en-v1.5 |
BGEBaseENV15Q |
Quantized BAAI/bge-base-en-v1.5 |
BGELargeENV15 |
BAAI/bge-large-en-v1.5 |
BGELargeENV15Q |
Quantized BAAI/bge-large-en-v1.5 |
BGESmallENV15 |
BAAI/bge-small-en-v1.5 - Default |
BGESmallENV15Q |
Quantized BAAI/bge-small-en-v1.5 |
NomicEmbedTextV1 |
nomic-ai/nomic-embed-text-v1 |
NomicEmbedTextV15 |
nomic-ai/nomic-embed-text-v1.5 |
NomicEmbedTextV15Q |
Quantized nomic-ai/nomic-embed-text-v1.5 |
ParaphraseMLMiniLML12V2 |
sentence-transformers/paraphrase-MiniLM-L6-v2 |
ParaphraseMLMiniLML12V2Q |
Quantized sentence-transformers/paraphrase-MiniLM-L6-v2 |
ParaphraseMLMpnetBaseV2 |
sentence-transformers/paraphrase-mpnet-base-v2 |
BGESmallZHV15 |
BAAI/bge-small-zh-v1.5 |
MultilingualE5Small |
intfloat/multilingual-e5-small |
MultilingualE5Base |
intfloat/multilingual-e5-base |
MultilingualE5Large |
intfloat/multilingual-e5-large |
MxbaiEmbedLargeV1 |
mixedbread-ai/mxbai-embed-large-v1 |
MxbaiEmbedLargeV1Q |
Quantized mixedbread-ai/mxbai-embed-large-v1 |
GTEBaseENV15 |
Alibaba-NLP/gte-base-en-v1.5 |
GTEBaseENV15Q |
Quantized Alibaba-NLP/gte-base-en-v1.5 |
GTELargeENV15 |
Alibaba-NLP/gte-large-en-v1.5 |
GTELargeENV15Q |
Quantized Alibaba-NLP/gte-large-en-v1.5 |
JINAV2SMALLEN |
jinaai/jina-embeddings-v2-small-en |
JINAV2BASEEN |
jinaai/jina-embeddings-v2-base-en |
JINAV3 |
jinaai/jina-embeddings-v3 |
Example Usage
import heapq
from embed_anything import (
EmbeddingModel,
TextEmbedConfig,
WhichModel,
embed_query,
ONNXModel,
)
import os
from time import time
import numpy as np
model = EmbeddingModel.from_pretrained_onnx(WhichModel.Bert, ONNXModel.BGESmallENV15Q)
# model = EmbeddingModel.from_pretrained_hf(
# WhichModel.Bert, "BAAI/bge-small-en-v1.5"
# )
sentences = [
"The quick brown fox jumps over the lazy dog",
"The cat is sleeping on the mat",
"The dog is barking at the moon",
"I love pizza",
"I like to have pasta",
"The dog is sitting in the park",
]
embedddings = embed_query(sentences, embeder=model)
embed_vector = np.array([e.embedding for e in embedddings])
print("shape of embed_vector", embed_vector.shape)
similarities = np.matmul(embed_vector, embed_vector.T)
# get top 5 similarities and show the two sentences and their similarity scores
# Flatten the upper triangle of the similarity matrix, excluding the diagonal
similarity_scores = [
(similarities[i, j], i, j)
for i in range(len(sentences))
for j in range(i + 1, len(sentences))
]
# Get the top 5 similarity scores
top_5_similarities = heapq.nlargest(5, similarity_scores, key=lambda x: x[0])
# Print the top 5 similarities with sentences
for score, i, j in top_5_similarities:
print(f"Score: {score:.2} | {sentences[i]} | {sentences[j]}")
from embed_anything import EmbeddingModel, WhichModel, embed_query, TextEmbedConfig
import os
import pymupdf
from semantic_text_splitter import TextSplitter
import os
model = EmbeddingModel.from_pretrained_onnx(WhichModel.Bert, ONNXModel.BGESmallENV15Q)
splitter = TextSplitter(1000)
config = TextEmbedConfig(batch_size=128)
def embed_anything():
# get all pdfs from test_files
for file in os.listdir("bench"):
text = []
doc = pymupdf.open("bench/" + file)
for page in doc:
text.append(page.get_text())
text = " ".join(text)
chunks = splitter.chunks(text)
embeddings = embed_query(chunks, model, config)
start = time()
embed_anything()
print(time() - start)