Skip to content

Commit

Permalink
support Web Speech API
Browse files Browse the repository at this point in the history
  • Loading branch information
Mahmoudz committed May 10, 2024
1 parent 0b97b51 commit 51209be
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 28 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Voice AI Assistant (React JS)
# AI Voice Assistant (React JS)

Turn your App smart with a conversational AI assistant and interactive voice UI **in less than 10 minutes**!
Give your App a voice, with a conversational AI assistant and interactive voice UI **in less than 10 minutes**!

**No Code Changes! No Intent Definitions!** _Just add our magic button `<AiAssistantButton />`._

Expand Down Expand Up @@ -367,7 +367,7 @@ Change AI assistant's voice via the [Admin Panel](https://admin.sista.ai/applica
<img src="./assets/sista-icon.png" alt="Sista Logo" width="100"/>
</a>

Unlock the Future with our advacned **Voice AI Assistant**: Embrace top-tier components:
Unlock the Future with our advacned **AI Voice Assistant**: Embrace top-tier components:

- Conversational AI Agents
- Interactive Voice UI
Expand Down
99 changes: 76 additions & 23 deletions src/core/AiAssistantEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import Scraper from './Scraper';
import config from './config';
import { VoiceFunction } from './commonTypes';
import User from './User';
import SpeechToText from './SpeechToText';

interface ApiResponse {
data: {
Expand All @@ -22,17 +23,24 @@ interface ApiResponse {
message: string;
}

enum UserInputMethod {
AUDIO = 'AUDIO',
TEXT = 'TEXT',
}

class AiAssistantEngine extends EventEmitter {
private readonly apiKey: string;
private readonly apiUrl: string;
private readonly scrapeContent: boolean;
private readonly sdkVersion: string;
private readonly audioPlayer: AudioPlayer;
private readonly audioRecorder: AudioRecorder;
private readonly speechToText: SpeechToText;
private readonly functionExecutor: FunctionExecutor;
private readonly scraper: Scraper;
private readonly user: User;
private readonly pageContent: Record<string, string[]> | null;
private userInputMethod: UserInputMethod;

constructor(
apiKey: string,
Expand All @@ -42,33 +50,45 @@ class AiAssistantEngine extends EventEmitter {
debugMode: boolean = false,
) {
super();
Logger.setDebugMode(debugMode);

if (!apiKey) {
throw new Error(
'Missing API Key for AiAssistantProvider. Get your FREE Key from https://admin.sista.ai/applications',
);
}

Logger.setDebugMode(debugMode);
// Control the user input method. TEXT = try to convert speech to text using
// browser's SpeechRecognition API first and fallback to Audio recording if it fails.
this.userInputMethod = UserInputMethod.TEXT;
this.sdkVersion = pkg.version;
Logger.log(
`--[SISTA]-- Initializing AiAssistantEngine Version: ${this.sdkVersion}`,
);
this.scrapeContent = scrapeContent;
this.apiKey = apiKey;
Logger.log(
'--[SISTA]-- Using Access Key:',
'...' + this.apiKey.slice(-8),
);
this.apiUrl = apiUrl;
Logger.log('--[SISTA]-- Using Base URL:', this.apiUrl);

this.audioPlayer = new AudioPlayer();
this.scrapeContent = scrapeContent;
this.user = new User(userId);
this.speechToText = new SpeechToText();
this.audioRecorder = new AudioRecorder();
this.audioPlayer = new AudioPlayer();
this.functionExecutor = new FunctionExecutor();
this.scraper = new Scraper();
this.pageContent = this.scrapeContent ? this.scraper.getText() : null;
this.user = new User(userId);

// Log the custom object
Logger.log(
'--[SISTA]-- Initialize Ai Assistant Engine:',
JSON.stringify(
{
Version: this.sdkVersion,
APIKey: '...' + this.apiKey.slice(-8),
APIUrl: this.apiUrl,
AutoScrapeContent: this.scrapeContent,
ConfiguredUserInputMethod: this.userInputMethod,
User: this.user,
},
null,
2,
),
);
}

registerFunctions(voiceFunctions: VoiceFunction[]): void {
Expand All @@ -79,40 +99,72 @@ class AiAssistantEngine extends EventEmitter {
Logger.log('--[SISTA]-- startProcessing');

this.emitStateChange(EventEmitter.STATE_LISTENING_START);

this.audioPlayer.playStartTone();

let userAudioCommand: Blob | undefined;
let inputUserCommand: string | Blob;

try {
userAudioCommand = await this.audioRecorder.startRecording();
inputUserCommand = await this.getUserInput();
Logger.log(
`--[SISTA]-- Used "User Input Method" = ${this.userInputMethod}`,
);
} catch (err) {
Logger.error('Error accessing the microphone:', err);
Logger.error('Error getting user input:', err);
this.emitStateChange(EventEmitter.STATE_IDLE);
return;
}

if (userAudioCommand) {
if (inputUserCommand) {
try {
await this._makeAPIRequest(userAudioCommand);
await this._makeAPIRequest(inputUserCommand);
} catch (err) {
Logger.error('Error making API request:', err);
this.emitStateChange(EventEmitter.STATE_IDLE);
}
}
};

private _makeAPIRequest = async (audioBlob: Blob): Promise<void> => {
private async getUserInput(): Promise<string | Blob> {
try {
if (this.userInputMethod === UserInputMethod.AUDIO) {
return await this.audioRecorder.startRecording();
} else if (this.userInputMethod === UserInputMethod.TEXT) {
return await this.speechToText.convertSpeechToText();
} else {
throw new Error('Invalid user input method!');
}
} catch (err) {
Logger.error('Error getting user input, switching method:', err);
this.userInputMethod =
this.userInputMethod === UserInputMethod.AUDIO
? UserInputMethod.TEXT
: UserInputMethod.AUDIO;
Logger.log(
`--[SISTA]-- FALLBACK: Switchig "User Input Method" To = ${this.userInputMethod}`,
);
return this.getUserInput();
}
}

private _makeAPIRequest = async (
userInput: Blob | string,
): Promise<void> => {
Logger.log('--[SISTA]-- _makeAPIRequest');
this.emitStateChange(EventEmitter.STATE_THINKING_START);

const formData = new FormData();

if (this.userInputMethod === UserInputMethod.AUDIO) {
formData.append('userInputAsAudio', userInput as Blob);
} else if (this.userInputMethod === UserInputMethod.TEXT) {
formData.append('userInputAsText', userInput as string);
}

formData.append('sdkVersion', this.sdkVersion);
formData.append(
'endUser',
JSON.stringify(this.user.getEndUserDetails()),
);
formData.append('audio', audioBlob);
formData.append(
'functionsSignatures',
JSON.stringify(this.functionExecutor.functionSignatures),
Expand Down Expand Up @@ -154,7 +206,9 @@ class AiAssistantEngine extends EventEmitter {
// ----[ Step 1: Display User Input Command ]----
// Handle user command as text first. This is useful for debugging
if (response.data.inputVoiceCommandAsText) {
this._handleInputVoiceCommandAsText(response.data.inputVoiceCommandAsText);
this._handleInputVoiceCommandAsText(
response.data.inputVoiceCommandAsText,
);
}

// ----[ Step 2: Display AI Text Reply ]----
Expand All @@ -180,7 +234,6 @@ class AiAssistantEngine extends EventEmitter {
if (response.data.outputAudioReply) {
this._handleAudioResponse(response.data.outputAudioReply);
}

};

private _handleAudioResponse = (audioFile: string): void => {
Expand Down
109 changes: 109 additions & 0 deletions src/core/SpeechToText.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import Logger from './Logger';

interface SpeechRecognitionEvent {
results: Array<{
[index: number]: { transcript: string };
isFinal: boolean;
}>;
resultIndex: number;
error?: string;
}

interface SpeechRecognitionObject {
new (): SpeechRecognitionObject;
prototype: SpeechRecognitionObject;
continuous: boolean;
interimResults: boolean;
lang: string;
onresult?: (event: SpeechRecognitionEvent) => void;
onerror?: (event: SpeechRecognitionEvent) => void;
onend?: () => void;
start: () => void;
stop: () => void;
}

declare const SpeechRecognition: SpeechRecognitionObject | undefined;
declare const webkitSpeechRecognition: SpeechRecognitionObject | undefined;

class SpeechToText {
private recognition: SpeechRecognitionObject;
private finalTranscript: string = '';
private isListening: boolean = false;

constructor() {
const SpeechRecognition =
window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognition) {
throw new Error(
'Speech recognition API not supported in this browser.',
);
}
this.recognition = new SpeechRecognition();
this.recognition.continuous = false;
this.recognition.interimResults = false;
this.recognition.lang = navigator.language || 'en-US';

this.recognition.onresult = (event: SpeechRecognitionEvent) => {
for (let i = event.resultIndex; i < event.results.length; ++i) {
const result = event.results[i];
if (result.isFinal) {
this.finalTranscript += result[0].transcript.trim();
}
}
Logger.log(`Final result: ${this.finalTranscript}`);
};

this.recognition.onerror = (event: SpeechRecognitionEvent) => {
Logger.error(`Speech recognition error: ${event.error}`);
this.isListening = false;
};

this.recognition.onend = () => {
Logger.log('Speech recognition stopped.');
this.isListening = false;
};
}

public async convertSpeechToText(): Promise<string> {
return new Promise((resolve, reject) => {
if (this.recognition.continuous) {
reject('Recognition is already in progress');
return;
}

this.finalTranscript = '';

this.recognition.onend = () => {
Logger.log('Speech recognition service has ended.');
resolve(this.finalTranscript);
this.isListening = false;
};

this.recognition.onerror = (event: SpeechRecognitionEvent) => {
Logger.error(`Speech recognition error: ${event.error}`);
reject(new Error(event.error));
this.isListening = false;
};

this.recognition.start();
Logger.log('Speech recognition started.');
});
}

public startListening(): void {
if (!this.isListening) {
this.finalTranscript = '';
this.recognition.start();
Logger.log('Speech recognition started.');
}
}

public stopListening(): void {
if (this.isListening) {
this.recognition.stop();
Logger.log('Speech recognition stopped.');
}
}
}

export default SpeechToText;
5 changes: 3 additions & 2 deletions src/core/User.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@ interface EndUserDetails {

class User {
private providedUserId: string | null;
private generatedUserId: string;

constructor(providedUserId: string | null) {
this.providedUserId = providedUserId;
this.generatedUserId = this._generateEndUserId();
}

public getEndUserDetails(): EndUserDetails {
return {
endUserAgent: navigator.userAgent,
generatedEndUserId: this._generateEndUserId(),
generatedEndUserId: this.generatedUserId,
providedEndUserId: this.providedUserId,
};
}
Expand All @@ -34,5 +36,4 @@ class User {
return endUserId;
}
}

export default User;

0 comments on commit 51209be

Please sign in to comment.