-
Voiceprints can also be vectorized using the
VectorizeVoiceprintsAPI, which returns a vector representation of each voiceprint that can be used for downstream tasks such as clustering, custom scoring, other machine learning models or even semantic searching in vector databases. -
See the API reference for more details.
-
The following example shows how to use the
VectorizeVoiceprintsAPI to vectorize voiceprints. The voiceprints can be loaded from files on disk or obtained from previous enrollment sessions.
Info
Voiceprints provided inVectorizeVoiceprints requests must be generated using the
same or compatible model via StreamingEnroll.
import numpy as np
import grpc
import cobaltspeech.voicebio.v1.voicebio_pb2_grpc as stub
import cobaltspeech.voicebio.v1.voicebio_pb2 as voicebio
serverAddress = "localhost:2727"
# Using a channel without TLS enabled.
channel = grpc.insecure_channel(serverAddress)
client = stub.VoiceBioServiceStub(channel)
# Get server version.
versionResp = client.Version(voicebio.VersionRequest())
print(versionResp)
# Get list of models on the server.
modelResp = client.ListModels(voicebio.ListModelsRequest())
print("Models:")
for model in modelResp.models:
print(model)
# Select a model ID from the list above. Going with the first model
# in this example. The model ID should be the same as the one used to
# generate the voiceprints being vectorized.
modelID = modelResp.models[0].id
# Loading voiceprints.
voiceprints = []
for p in ["user1.bin", "user2.bin", "user3.bin"]:
with open(p, 'r') as f:
voiceprints.append(voicebio.Voiceprint(data=f.read().strip()))
# Set the vectorization config.
req = voicebio.VectorizeVoiceprintsRequest(
model_id=modelID,
voiceprints=voiceprints,
)
# Vectorize voiceprints.
result = client.VectorizeVoiceprints(req)
# The server returns a list of vectorized voiceprints in the same order as the input voiceprints.
#
# In most cases, the vectorized voiceprints can be compared using simple distance metrics such as
# cosine similarity or euclidean distance. This is not guaranteed, however, and depends on the model
# used to generate the voiceprints and vectorize them.
# Example using cosine similarity.
n = len(result.voiceprints)
similarity = np.zeros((n, n), dtype=np.float32)
for i, vi in enumerate(result.voiceprints):
for j, vj in enumerate(result.voiceprints):
similarity[i, j] = np.dot(vi.data, vj.data) / (np.linalg.norm(vi.data) * np.linalg.norm(vj.data))
print("Cosine Similarity Matrix:")
print(similarity)package main
import (
"context"
"fmt"
"os"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
voicebio "github.com/cobaltspeech/go-genproto/cobaltspeech/voicebio/v1"
)
func main() {
const (
serverAddress = "localhost:2727"
)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
opts := []grpc.DialOption{
grpc.WithTransportCredentials(insecure.NewCredentials()), // Using a channel without TLS enabled.
grpc.WithBlock(),
grpc.WithReturnConnectionError(),
grpc.FailOnNonTempDialError(true),
}
conn, err := grpc.DialContext(ctx, serverAddress, opts...)
if err != nil {
fmt.Printf("failed to dial gRPC connection: %v\n", err)
os.Exit(1)
}
client := voicebio.NewVoiceBioServiceClient(conn)
// Get server version.
versionResp, err := client.Version(ctx, &voicebio.VersionRequest{})
if err != nil {
fmt.Printf("failed to get server version: %v\n", err)
os.Exit(1)
}
fmt.Printf("%v\n", versionResp)
// Get list model of models on the server.
modelResp, err := client.ListModels(ctx, &voicebio.ListModelsRequest{})
if err != nil {
fmt.Printf("failed to get model list: %v\n", err)
os.Exit(1)
}
fmt.Println("Models:")
for _, m := range modelResp.Models {
fmt.Println(m)
}
fmt.Println()
// Reading voiceprint data.
voiceprints := make([]*voicebio.Voiceprint, 0)
for i, p := range []string{"user1.bin", "user2.bin", "user3.bin"} {
data, err := os.ReadFile(p)
if err != nil {
fmt.Printf("\nfailed to read voiceprint[%d] data: %v\n", i, err)
os.Exit(1)
}
voiceprints = append(voiceprints, &voicebio.Voiceprint{Data: string(data)})
}
// Selecting the first model. The model ID should be the same as the one used to generate the
// voiceprints being compared.
req := &voicebio.VectorizeVoiceprintsRequest{
ModelId: modelResp.Models[0].Id,
Voiceprints: voiceprints,
}
// Vectorize voiceprints.
result, err := client.VectorizeVoiceprints(ctx, req)
if err != nil {
fmt.Printf("failed to vectorize voiceprints: %v\n", err)
os.Exit(1)
}
// The server returns a list of vectorized voiceprints in the same order as the input voiceprints.
//
// In almost cases, the vectorized voiceprints can be compared using simple distance metrics such as
// cosine similarity or euclidean distance. This is not guaranteed, however, and depends on the model
// used to generate the voiceprints and vectorize them.
// Example using cosine similarity.
n := len(result.Voiceprints)
similarity := make([][]float32, n)
for i := range similarity {
similarity[i] = make([]float32, n)
}
for i, vi := range result.Voiceprints {
for j, vj := range result.Voiceprints {
dotProduct := float32(0.0)
normVi := float32(0.0)
normVj := float32(0.0)
for k := range vi.Data {
dotProduct += vi.Data[k] * vj.Data[k]
normVi += vi.Data[k] * vi.Data[k]
normVj += vj.Data[k] * vj.Data[k]
}
denom := float32(math.Sqrt(float64(normVi)) * math.Sqrt(float64(normVj)))
similarity[i][j] = dotProduct / denom
}
}
fmt.Printf("Cosine Similarity Matrix:\n")
for i := range similarity {
for j := range similarity[i] {
fmt.Printf("%1.3f ", similarity[i][j])
}
fmt.Println()
}
}