useSpeechRecognition

Hook that provides a streamlined interface for incorporating speech-to-text functionality

browserlowtest coverage

Api not supported, make sure to check for compatibility with different browsers when using this api

import { useSpeechRecognition } from '@siberiacancode/reactuse';
import { MicIcon, SearchIcon } from 'lucide-react';
import { useRef, useState } from 'react';

import { cn } from '@/utils/lib';

const Demo = () => {
  const [query, setQuery] = useState('');
  const [silent, setSilent] = useState(false);

  const listeningRef = useRef(false);
  const silenceTimerRef = useRef<ReturnType<typeof setTimeout>>(null);

  const speechRecognition = useSpeechRecognition({
    language: 'en-US',
    continuous: true,
    interimResults: true,
    onResult: (event) => {
      const result = event.results[event.results.length - 1];
      if (!result.isFinal) return;

      setQuery((prev) => `${prev} ${result[0].transcript}`.trim());

      if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
      setSilent(false);
      silenceTimerRef.current = setTimeout(setSilent, 2000, true);
    }
  });

  const recognition = speechRecognition.recognition;

  const onStart = () => {
    listeningRef.current = true;
    if (recognition) {
      recognition.onend = () => {
        if (listeningRef.current) recognition.start();
        else {
          if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
          setSilent(false);
        }
      };
    }
    speechRecognition.start();
    setSilent(false);
    silenceTimerRef.current = setTimeout(setSilent, 2000, true);
  };

  const onStop = () => {
    listeningRef.current = false;
    if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
    setSilent(false);
    speechRecognition.stop();
  };

  if (!speechRecognition.supported)
    return (
      <p>
        Api not supported, make sure to check for compatibility with different browsers when using
        this{' '}
        <a
          href='https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition'
          rel='noreferrer'
          target='_blank'
        >
          api
        </a>
      </p>
    );

  return (
    <section className='flex w-full max-w-sm flex-col gap-2 p-4'>
      <span className='text-muted-foreground text-xs'>Voice search use English (US)</span>

      <div className='relative'>
        <SearchIcon className='text-muted-foreground pointer-events-none absolute top-1/2 left-4 size-4 -translate-y-1/2' />
        <input
          className='h-11! rounded-full! pr-12! pl-10!'
          placeholder={speechRecognition.listening ? 'Listening…' : 'Search or speak'}
          value={query}
          onChange={(event) => setQuery(event.target.value)}
        />
        <button
          aria-label={speechRecognition.listening ? 'Stop' : 'Search by voice'}
          className='absolute top-1/2 right-2 -translate-y-1/2 rounded-full!'
          data-size='icon-sm'
          data-variant={speechRecognition.listening ? 'default' : 'ghost'}
          type='button'
          onClick={() => (speechRecognition.listening ? onStop() : onStart())}
        >
          <MicIcon className={cn('size-4', speechRecognition.listening && 'animate-pulse')} />
        </button>
      </div>

      {silent && (
        <p className='text-muted-foreground min-h-4 text-xs'>Can't hear you — try speaking up</p>
      )}
    </section>
  );
};

export default Demo;

This hook uses window.SpeechRecognition browser api to provide enhanced functionality. Make sure to check for compatibility with different browsers when using this api

Installation

pnpm add @siberiacancode/reactuse

Usage

const { supported, value, recognition, listening, error, start, stop, toggle  } = useSpeechRecognition();

Type Declarations

interface UseSpeechRecognitionOptions {
  /** If true, recognition continues even after pauses in speech. Default is false */
  continuous?: SpeechRecognition['continuous'];
  /** A list of grammar rules */
  grammars?: SpeechRecognition['grammars'];
  /** If true, interim (non-final) results are provided as the user speaks */
  interimResults?: SpeechRecognition['interimResults'];
  /** The language in which recognition should occur. Must be a valid BCP 47 language tag (e.g., "en-US", "ru-RU") */
  language?: SpeechRecognition['lang'];
  /** The maximum number of alternative transcripts returned for a given recognition result. Must be a positive integer */
  maxAlternatives?: SpeechRecognition['maxAlternatives'];
  /** Callback invoked when speech recognition ends */
  onEnd?: () => void;
  /** Callback invoked when an error occurs during recognition */
  onError?: (error: SpeechRecognitionErrorEvent) => void;
  /** Callback invoked when recognition produces a result */
  onResult?: (event: SpeechRecognitionEvent) => void;
  /** Callback invoked when speech recognition starts */
  onStart?: () => void;
}

interface UseSpeechRecognitionReturn {
  /** The error state */
  error: SpeechRecognitionErrorEvent | null;
  /** The final transcript */
  final: boolean;
  /** Whether the hook is currently listening for speech */
  listening: boolean;
  /** The speech recognition instance */
  recognition?: SpeechRecognition;
  /** Whether the current browser supports the Web Speech API */
  supported: boolean;
  /** The current transcript */
  transcript: string;
  /** Begins speech recognition */
  start: () => void;
  /** Ends speech recognition, finalizing results */
  stop: () => void;
  /** Toggles the listening state */
  toggle: (value?: boolean) => void;
}

API

Parameters

Name	Type	Default	Note
options.continuous	boolean	false	Whether recognition should continue after pauses
options.interimResults	boolean	false	Whether interim results should be provided
options.language	string	"en-US"	The language for recognition, as a valid BCP 47 tag
options.maxAlternatives	number	1	The maximum number of alternative transcripts to return
options.grammars	`SpeechGrammarList`	-	A list of grammar rules
options.onStart	`() => void`	-	Callback invoked when speech recognition starts
options.onEnd	`() => void`	-	Callback invoked when speech recognition ends
options.onError	`(error: SpeechRecognitionErrorEvent) => void`	-	Callback invoked when an error occurs during recognition
options.onResult	`(event: SpeechRecognitionEvent) => void`	-	Callback invoked when recognition produces a result

Returns

UseSpeechRecognitionReturn

Contributors

ddebabinAArtem Dereviago

Last updated on

useShare useSpeechSynthesis