Skip to content

Using ONNX Models

Supported Models

Enum Variant Description
AllMiniLML6V2 sentence-transformers/all-MiniLM-L6-v2
AllMiniLML6V2Q Quantized sentence-transformers/all-MiniLM-L6-v2
AllMiniLML12V2 sentence-transformers/all-MiniLM-L12-v2
AllMiniLML12V2Q Quantized sentence-transformers/all-MiniLM-L12-v2
BGEBaseENV15 BAAI/bge-base-en-v1.5
BGEBaseENV15Q Quantized BAAI/bge-base-en-v1.5
BGELargeENV15 BAAI/bge-large-en-v1.5
BGELargeENV15Q Quantized BAAI/bge-large-en-v1.5
BGESmallENV15 BAAI/bge-small-en-v1.5 - Default
BGESmallENV15Q Quantized BAAI/bge-small-en-v1.5
NomicEmbedTextV1 nomic-ai/nomic-embed-text-v1
NomicEmbedTextV15 nomic-ai/nomic-embed-text-v1.5
NomicEmbedTextV15Q Quantized nomic-ai/nomic-embed-text-v1.5
ParaphraseMLMiniLML12V2 sentence-transformers/paraphrase-MiniLM-L6-v2
ParaphraseMLMiniLML12V2Q Quantized sentence-transformers/paraphrase-MiniLM-L6-v2
ParaphraseMLMpnetBaseV2 sentence-transformers/paraphrase-mpnet-base-v2
BGESmallZHV15 BAAI/bge-small-zh-v1.5
MultilingualE5Small intfloat/multilingual-e5-small
MultilingualE5Base intfloat/multilingual-e5-base
MultilingualE5Large intfloat/multilingual-e5-large
MxbaiEmbedLargeV1 mixedbread-ai/mxbai-embed-large-v1
MxbaiEmbedLargeV1Q Quantized mixedbread-ai/mxbai-embed-large-v1
GTEBaseENV15 Alibaba-NLP/gte-base-en-v1.5
GTEBaseENV15Q Quantized Alibaba-NLP/gte-base-en-v1.5
GTELargeENV15 Alibaba-NLP/gte-large-en-v1.5
GTELargeENV15Q Quantized Alibaba-NLP/gte-large-en-v1.5
JINAV2SMALLEN jinaai/jina-embeddings-v2-small-en
JINAV2BASEEN jinaai/jina-embeddings-v2-base-en
JINAV2LARGEEN jinaai/jina-embeddings-v2-large-en

Example Usage

import heapq
from embed_anything import EmbeddingModel, TextEmbedConfig, WhichModel, embed_query, ONNXModel
import os
from time import time
import numpy as np

model = EmbeddingModel.from_pretrained_onnx(WhichModel.Bert, ONNXModel.BGESmallENV15Q)

# model = EmbeddingModel.from_pretrained_hf(
#     WhichModel.Bert, "BAAI/bge-small-en-v1.5"
# )

sentences = [
    "The quick brown fox jumps over the lazy dog",
    "The cat is sleeping on the mat",
    "The dog is barking at the moon",
    "I love pizza",
    "I like to have pasta",
    "The dog is sitting in the park",
]

embedddings = embed_query(sentences, embeder=model)

embed_vector = np.array([e.embedding for e in embedddings])

print("shape of embed_vector", embed_vector.shape)
similarities = np.matmul(embed_vector, embed_vector.T)

# get top 5 similarities and show the two sentences and their similarity scores
# Flatten the upper triangle of the similarity matrix, excluding the diagonal
similarity_scores = [
    (similarities[i, j], i, j)
    for i in range(len(sentences))
    for j in range(i + 1, len(sentences))
]

# Get the top 5 similarity scores
top_5_similarities = heapq.nlargest(5, similarity_scores, key=lambda x: x[0])

# Print the top 5 similarities with sentences
for score, i, j in top_5_similarities:
    print(f"Score: {score:.2} | {sentences[i]} | {sentences[j]}")


from embed_anything import EmbeddingModel, WhichModel, embed_query, TextEmbedConfig
import os
import pymupdf
from semantic_text_splitter import TextSplitter
import os

model = EmbeddingModel.from_pretrained_onnx(WhichModel.Bert, ONNXModel.BGESmallENV15Q)
splitter = TextSplitter(1000)
config = TextEmbedConfig(batch_size=128)


def embed_anything():
    # get all pdfs from test_files

    for file in os.listdir("bench"):
        text = []
        doc = pymupdf.open("bench/" + file)

        for page in doc:
            text.append(page.get_text())

        text = " ".join(text)
        chunks = splitter.chunks(text)
        embeddings = embed_query(chunks, model, config)


start = time()
embed_anything()

print(time() - start)