Generate & stream a tone:

ffmpeg -re -f lavfi -i aevalsrc="sin(400*2*PI*t)" -ar 8000 -f mulaw -f rtp rtp://127.0.0.1:1234

To stream:

ffmpeg -stream_loop -1 -re -i /Users/harims/code/archived/vosk_code/audiofiles/hellohowareyou.mp3  -ar 8000 -f mulaw -f rtp rtp://127.0.0.1:1234

To receive:

Take the SDP information from the output of above command. Make a something.sdp file and paste info. Open with VLC player.

ffplay -protocol_whitelist file,udp,rtp -i /< fullpath >/something.sdp

Only one receiver at a time.

BELOW: something.sdp file

v=0
o=- 0 0 IN IP4 127.0.0.1
s=No Name
c=IN IP4 127.0.0.1
t=0 0
a=tool:libavformat 60.3.100
m=audio 1234 RTP/AVP 97
b=AS:128
a=rtpmap:97 PCMU/8000/2

Transcribing

python vosk-transcriber --input /Users/harims/code/archived/vosk_code/audiofiles/hellohowareyou.mp3
#!/Users/harims/code/venvs/voicerecognition/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from vosk.transcriber.cli import main
if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())

Using Kaldi recognizer

#!/usr/bin/env python3

import subprocess
import sys

from vosk import Model, KaldiRecognizer, SetLogLevel

SAMPLE_RATE = 16000

SetLogLevel(0)

model = Model(lang="en-us")
rec = KaldiRecognizer(model, SAMPLE_RATE)

with subprocess.Popen(["ffmpeg","-protocol_whitelist", "file,udp,rtp", "-i", "/Users/harims/code/archived/vosk_code/foo.sdp","-ar", str(SAMPLE_RATE) , "-ac", "1", "-f", "s16le", "-",  "-loglevel", "quiet"], stdout=subprocess.PIPE) as process:

    while True:
        data = process.stdout.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            print(rec.Result())
        # else:
        #     print(rec.PartialResult())

    print(rec.FinalResult())

**Working Input from Microphone & Transcribing**

#!/usr/bin/env python3

import subprocess
import sys

from vosk import Model, KaldiRecognizer, SetLogLevel

SAMPLE_RATE = 16000

SetLogLevel(0)
model = Model(lang="en-us")
# model = Model(model_path="/Users/harims/code/vosk-model-en-us-0.42-gigaspeech", lang="en-us") // Big model
rec = KaldiRecognizer(model, SAMPLE_RATE)

# FROM MIC

import pyaudio # Soundcard audio I/O access library
import wave # Python 3 module for reading / writing simple .wav files

# Setup channel info
FORMAT = pyaudio.paInt16 # data type formate
CHANNELS = 1 # Adjust to your number of channels
RATE = SAMPLE_RATE # Sample Rate
CHUNK = 1024 # Block Size
RECORD_SECONDS = 10 # Record time
WAVE_OUTPUT_FILENAME = "file.wav"

# Startup pyaudio instance
audio = pyaudio.PyAudio()

# start Recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,
                frames_per_buffer=CHUNK)

print("recording...")

frames = []

# Record for RECORD_SECONDS
# for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
#     data = stream.read(CHUNK)
#     frames.append(data)
# print ("finished recording")

# frames = []

# Record for RECORD_SECONDS
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    # frames.append(data)
    if len(data) == 0:
        break
    if rec.AcceptWaveform(data):
        print(rec.Result())
    else:
        print(rec.PartialResult())
        
print ("finished recording")


# Stop Recording
stream.stop_stream()
stream.close()
audio.terminate()

###########################
 Write to file
###########################
# Write your new .wav file with built in Python 3 Wave module
# waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
# waveFile.setnchannels(CHANNELS)
# waveFile.setsampwidth(audio.get_sample_size(FORMAT))
# waveFile.setframerate(RATE)
# waveFile.writeframes(b''.join(frames))
# waveFile.close()

#####################################################################
 Working code - From rtp stream (via sdp file)
#####################################################################

# with subprocess.Popen(["ffmpeg","-protocol_whitelist", "file,udp,rtp", "-i", "/Users/harims/code/archived/vosk_code/foo.sdp","-ar", str(SAMPLE_RATE) , "-ac", "1", "-f", "s16le", "-",  "-loglevel", "quiet"],
#                             stdout=subprocess.PIPE) as process:

#     while True:
#         data = process.stdout.read(4000)
#         if len(data) == 0:
#             break
#         if rec.AcceptWaveform(data):
#             print(rec.Result())
#         # else:
#         #     print(rec.PartialResult())

#     print(rec.FinalResult())

Text to speech

tts --text "Text for TTS" \ --model_name "tts_models/en/ek1/tacotron2" \ --vocoder_name "vocoder_models/universal/libri-tts/wavegrad" \ --out_path ~/sound.wav

#sideprojects