Documentation

Face ID

Accepts a JPG (JPEG) or PNG image (up to 3 MB), detects the largest face, and returns a 512-dimensional face embedding that can be used for verification or identification. Cosine similarity should be used as the comparison metric, with a recommended threshold range of 0.4 – 0.8, depending on how constrained the face capture process is and the desired balance between security and convenience for your application.

The optimal cosine similarity threshold depends on how constrained the face capture process is (e.g., pose, lighting, glasses, masks, etc.) and the desired balance between convenience and security (i.e., minimizing the False Rejection Rate (FRR) vs. the False Acceptance Rate (FAR)). For the Equal Error Rate (EER), the recommended thresholds are 0.55 for unconstrained capture and 0.66 for constrained capture comparisons. However, the optimal value may vary depending on your data.

The size of the facial area can affect model performance. For optimal accuracy, a minimum facial area of 300×300 pixels is recommended.

Although the model is trained for unconstrained face recognition, recognizing faces in a constrained capture setting is naturally an easier task and yields better performance.

Check out the example below:
Python
JavaScript
import requests
import base64
import json

def encode_image(file_path):
with open(file_path, "rb") as f:
encoded_string = base64.b64encode(f.read()).decode('utf-8')
return encoded_string

def encode_face(api_key, endpoint, file_path):
encoded_image = encode_image(file_path)
payload = json.dumps({
"image_data": encoded_image
})
headers = {
"x-api-key": api_key,
"Content-Type": "application/json"
}
response = requests.post(endpoint,
data=payload,
headers=headers)
return response

url = "https://api.biomodality.com/encode_face"
image_path = "./samples/example_image.jpg"
response = encode_face(YOUR_API_KEY, url, image_path)
print(response.json())
const fs = require("fs");

function encodeImage(filePath) {
const fileBuffer = fs.readFileSync(filePath);
return fileBuffer.toString("base64");
}

async function encodeFace(apiKey, endpoint, filePath) {
const encodedImage = encodeImage(filePath);
const payload = JSON.stringify({
image_data: encodedImage
});
const res = await fetch(endpoint, {
method: "POST",
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json"
},
body: payload
});
return res.json();
}

const apiKey = "YOUR_API_KEY";
const url = "https://api.biomodality.com/encode_face";
const imagePath = "./samples/example_image.jpg";

encodeFace(apiKey, url, imagePath)
.then(response => console.log(JSON.stringify(response)))
.catch(err => console.error(err));
Accepts two JPG (JPEG) or PNG images (up to 3 MB each), along with an optional cosine similarity threshold (0.4 – 0.8, defaults to 0.5), and returns the cosine similarity and match result between the two largest faces in the images. Cosine similarity threshold depends on how constrained the face capture process is and the desired balance between security and convenience for your application.

The optimal cosine similarity threshold depends on how constrained the face capture process is (e.g., pose, lighting, glasses, masks, etc.) and the desired balance between convenience and security (i.e., minimizing the False Rejection Rate (FRR) vs. the False Acceptance Rate (FAR)). For the Equal Error Rate (EER), the recommended thresholds are 0.55 for unconstrained capture and 0.66 for constrained capture comparisons. However, the optimal value may vary depending on your data.

The size of the facial area can affect model performance. For optimal accuracy, a minimum facial area of 300×300 pixels is recommended.

Although the model is trained for unconstrained face recognition, recognizing faces in a constrained capture setting is naturally an easier task and yields better performance.

Check out the example below:
Python
JavaScript
import requests
import base64
import json

def encode_image(file_path):
with open(file_path, "rb") as f:
encoded_string = base64.b64encode(f.read()).decode('utf-8')
return encoded_string

def compare_faces(api_key, endpoint, file_path_1, file_path_2, threshold=0.4):
encoded_image_1 = encode_image(file_path_1)
encoded_image_2 = encode_image(file_path_2)
payload_1 = {
"image_data": encoded_image_1
}
payload_2 = {
"image_data": encoded_image_2
}
payload = json.dumps({
"payload_1": payload_1,
"payload_2": payload_2,
"cosine_similarity_threshold": threshold
})
headers = {
"x-api-key": api_key,
"Content-Type": "application/json"
}
response = requests.post(endpoint,
data=payload,
headers=headers)
return(response)

url = "https://api.biomodality.com/compare_faces"
image_path_1 = "./samples/example_image_1.jpg"
image_path_2 = "./samples/example_image_2.jpg"
response = compare_faces(YOUR_API_KEY, url, image_path_1, image_path_2, threshold=0.4)
print(response.json())
const fs = require("fs");

function encodeImage(filePath) {
const fileBuffer = fs.readFileSync(filePath);
return fileBuffer.toString("base64");
}

async function compareFaces(apiKey, endpoint, filePath1, filePath2, threshold = 0.4) {
const encodedImage1 = encodeImage(filePath1);
const encodedImage2 = encodeImage(filePath2);
const payload1 = { image_data: encodedImage1 };
const payload2 = { image_data: encodedImage2 };
const payload = JSON.stringify({
payload_1: payload1,
payload_2: payload2,
cosine_similarity_threshold: threshold
});
const res = await fetch(endpoint, {
method: "POST",
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json"
},
body: payload
});
return res.json();
}

const apiKey = "YOUR_API_KEY";
const url = "https://api.biomodality.com/compare_faces";
const imagePath1 = "./samples/example_image_1.jpg";
const imagePath2 = "./samples/example_image_2.jpg";

compareFaces(apiKey, url, imagePath1, imagePath2, threshold = 0.4)
.then(response => console.log(JSON.stringify(response)))
.catch(err => console.error(err));

VIEW API Definition

Voice ID

Accepts 16 kHz waveform audio (up to 10 seconds) and returns a 192-dimensional speaker embedding that can be used for verification or identification. Cosine similarity should be used as the comparison metric, with a recommended threshold range of 0.1 – 0.5, depending on whether it's used to compare text-dependent or text-independent audios, how constrained the voice capture process is, and the desired balance between security and convenience for your application.

The optimal cosine similarity threshold depends on the application (text-dependent vs. text-independent), the quality of the captured audio, and the desired balance between convenience and security (i.e., minimizing the False Rejection Rate (FRR) vs. the False Acceptance Rate (FAR)). For the Equal Error Rate (EER), the recommended thresholds are 0.30 for text-independent and 0.38 for text-dependent comparisons. However, the optimal value may vary depending on the dataset.

Audio duration can impact model performance. For optimal accuracy, input audio of 3 - 10 seconds are recommended, as longer audio provides the model with more information (e.g., the end user’s date of birth, phone number, or 6-digit speech validation tokens).

Although the model is trained for text-independent voice recognition, recognizing voices in a text-dependent setting is naturally an easier task and yields better performance.

Check out the example below:
Python
JavaScript
import requests
import base64
import json

def encode_wav(file_path):
with open(file_path, "rb") as f:
encoded_wav = base64.b64encode(f.read()).decode("utf-8")
return encoded_wav

def encode_voice(api_key, endpoint, file_path):
encoded_wav = encode_wav(file_path)
payload = json.dumps({
"audio_data": encoded_wav
})
headers = {
"x-api-key": api_key,
"Content-Type": "application/json"
}
response = requests.post(endpoint,
data=payload,
headers=headers)
return response

url = "https://api.biomodality.com/encode_voice"
wave_path = "./samples/example_audio.wav"
response = encode_voice(YOUR_API_KEY, url, wave_path)
print(response.json())
const fs = require("fs");

function encodeWav(filePath) {
const fileBuffer = fs.readFileSync(filePath);
return fileBuffer.toString("base64");
}

async function encodeVoice(apiKey, endpoint, filePath) {
const encodedWav = encodeWav(filePath);
const payload = JSON.stringify({
audio_data: encodedWav
});
const res = await fetch(endpoint, {
method: "POST",
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json"
},
body: payload
});
return res.json();
}

const apiKey = "YOUR_API_KEY";
const url = "https://api.biomodality.com/encode_voice";
const wavePath = "./samples/example_audio.wav";

encodeVoice(apiKey, url, wavePath)
.then(response => console.log(JSON.stringify(response)))
.catch(err => console.error(err));
Accepts two 16 kHz waveform audios (up to 10 seconds each), along with an optional cosine similarity threshold (0.1 – 0.5, defaults to 0.25), and returns the cosine similarity and match result between the two speakers in the audios. Cosine similarity threshold depends on whether it's used to compare text-dependent or text-independent audios, how constrained the voice capture process is, and the desired balance between security and convenience for your application.

The optimal cosine similarity threshold depends on the application (text-dependent vs. text-independent), the quality of the captured audio, and the desired balance between convenience and security (i.e., minimizing the False Rejection Rate (FRR) vs. the False Acceptance Rate (FAR)). For the Equal Error Rate (EER), the recommended thresholds are 0.30 for text-independent and 0.38 for text-dependent comparisons. However, the optimal value may vary depending on the dataset.

Audio duration can impact model performance. For optimal accuracy, input audio of 3 - 10 seconds are recommended, as longer audio provides the model with more information (e.g., the end user’s date of birth, phone number, or 6-digit speech validation tokens).

Although the model is trained for text-independent voice recognition, recognizing voices in a text-dependent setting is naturally an easier task and yields better performance.

Check out the example below:
Python
JavaScript
import requests
import base64
import json

def encode_wav(file_path):
with open(file_path, "rb") as f:
encoded_wav = base64.b64encode(f.read()).decode("utf-8")
return encoded_wav

def compare_voices(api_key, endpoint, file_path_1, file_path_2, threshold=0.25):
encoded_wav_1 = encode_wav(file_path_1)
encoded_wav_2 = encode_wav(file_path_2)
payload_1 = {
"audio_data": encoded_wav_1
}
payload_2 = {
"audio_data": encoded_wav_2
}
payload = json.dumps({
"payload_1": payload_1,
"payload_2": payload_2,
"cosine_similarity_threshold": threshold
})
headers = {
"x-api-key": api_key,
"Content-Type": "application/json"
}
response = requests.post(endpoint,
data=payload,
headers=headers)
return(response)

url = "https://api.biomodality.com/compare_voices"
wave_path_1 = "./samples/example_audio_1.wav"
wave_path_2 = "./samples/example_audio_2.wav"
response = compare_voices(YOUR_API_KEY, url, wave_path_1, wave_path_2, threshold=0.25)
print(response.json())
const fs = require("fs");

function encodeWav(filePath) {
const fileBuffer = fs.readFileSync(filePath);
return fileBuffer.toString("base64");
}

async function compareVoices(apiKey, endpoint, filePath1, filePath2, threshold = 0.25) {
const encodedWav1 = encodeWav(filePath1);
const encodedWav2 = encodeWav(filePath2);
const payload1 = { audio_data: encodedWav1 };
const payload2 = { audio_data: encodedWav2 };
const payload = JSON.stringify({
payload_1: payload1,
payload_2: payload2,
cosine_similarity_threshold: threshold
});
const res = await fetch(endpoint, {
method: "POST",
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json"
},
body: payload
});
return res.json();
}

const apiKey = "YOUR_API_KEY";
const url = "https://api.biomodality.com/compare_voices";
const wavePath1 = "./samples/example_audio_1.wav";
const wavePath2 = "./samples/example_audio_2.wav";

compareVoices(apiKey, url, wavePath1, wavePath2, threshold = 0.25)
.then(response => console.log(JSON.stringify(response)))
.catch(err => console.error(err));
Accepts a length parameter (4 – 6) and returns a hyphen-separated token of the same length that complies with the validate_speech resource. You can use this resource to obtain example tokens and then generate similar ones locally for faster, more cost-efficient processing.

Check out the example below:
Python
JavaScript
import requests
import base64
import json

def get_token(api_key, endpoint, length):
payload = json.dumps({
"length": length
})
headers = {
"x-api-key": api_key,
"Content-Type": "application/json"
}
response = requests.post(endpoint,
data=payload,
headers=headers)
return response

url = "https://api.biomodality.com/get_token"
response = get_token(YOUR_API_KEY, url, 6)
print(response.json())
async function getToken(apiKey, endpoint, length) {
const payload = JSON.stringify({
length: length
});
const res = await fetch(endpoint, {
method: "POST",
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json"
},
body: payload
});
return res.json();
}

const apiKey = "YOUR_API_KEY";
const url = "https://api.biomodality.com/get_token";

getToken(apiKey, url, 6)
.then(response => console.log(JSON.stringify(response)))
.catch(err => console.error(err));
Accepts 16 kHz waveform audio (up to 10 seconds) and a 4-to-6 digit token, extracts the spoken digits from the audio (110 language/region supported), and returns the Levenshtein distance between the extracted digits and the provided token. The Levenshtein distance measures the minimum number of single-digit insertions, deletions, or substitutions required to transform the extracted digits into the provided token. The length of the token and the acceptable Levenshtein distance depends on how constrained the voice capture process is and the desired balance between security and convenience for your application. For improved convenience, using longer tokens with some tolerance for the Levenshtein distance is recommended (e.g., 5-digit token with an acceptable Levenshtein distance of 1).

Check out the example below:
Python
JavaScript
import requests
import base64
import json

def encode_wav(file_path):
with open(file_path, "rb") as f:
encoded_wav = base64.b64encode(f.read()).decode("utf-8")
return encoded_wav

def validate_speech(api_key, endpoint, file_path, language_code, token):
encoded_wav = encode_wav(file_path)
payload = json.dumps({
"audio_data": encoded_wav,
"language_code": language_code,
"token": token
})
headers = {
"x-api-key": api_key,
"Content-Type": "application/json"
}
response = requests.post(endpoint,
data=payload,
headers=headers)
return response

url = "https://api.biomodality.com/validate_speech"
wave_path = "./samples/example_speech.wav"
language_code = "en-US"
token = "1-2-3-4-5-6"
response = validate_speech(YOUR_API_KEY, url, wave_path, language_code, token)
print(response.json())
const fs = require("fs");

function encodeWav(filePath) {
const fileBuffer = fs.readFileSync(filePath);
return fileBuffer.toString("base64");
}

async function validateSpeech(apiKey, endpoint, filePath, languageCode, token) {
const encodedWav = encodeWav(filePath);
const payload = JSON.stringify({
audio_data: encodedWav,
language_code: languageCode,
token: token
});
const res = await fetch(endpoint, {
method: "POST",
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json"
},
body: payload
});
return res.json();
}

const apiKey = "YOUR_API_KEY";
const url = "https://api.biomodality.com/validate_speech";
const wavePath = "./samples/example_speech.wav";
const languageCode = "en-US";
const token = "1-2-3-4-5-6";

validateSpeech(apiKey, url, wavePath, languageCode, token)
.then(response => console.log(JSON.stringify(response)))
.catch(err => console.error(err));

VIEW API Definition

Account

Accepts an optional number of days back parameter (0 – 90, defaults to 30) and returns details about the usage plan associated with the API key, along with daily usage. Please note that this resource is not intended for real-time usage checks. Actual usage data may take several minutes, and in some cases hours, to be reflected. It is more suitable for billing and reporting purposes.

Check out the example below:
Python
JavaScript
import requests
import json

def check_usage(api_key, endpoint, days_back=30):
payload = json.dumps({
"days_back": days_back
})
headers = {
"x-api-key": api_key,
"Content-Type": "application/json"
}
response = requests.post(endpoint,
data=payload,
headers=headers)
return response

url = "https://api.biomodality.com/check_usage"
response = check_usage(YOUR_API_KEY, url, days_back=2)
print(response.json())
async function checkUsage(apiKey, endpoint, days_back) {
const payload = JSON.stringify({
days_back: days_back
});
const res = await fetch(endpoint, {
method: "POST",
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json"
},
body: payload
});
return res.json();
}

const apiKey = "YOUR_API_KEY";
const url = "https://api.biomodality.com/check_usage";

checkUsage(apiKey, url, 2)
.then(response => console.log(JSON.stringify(response)))
.catch(err => console.error(err));

VIEW API Definition