Skip to content

Commit

Permalink
fix hallucination using threading and queue
Browse files Browse the repository at this point in the history
  • Loading branch information
Epic-Eric committed Aug 29, 2024
1 parent 86638df commit ccfaaa5
Showing 1 changed file with 37 additions and 28 deletions.
65 changes: 37 additions & 28 deletions simuleval/evaluator/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,29 +71,37 @@ def __init__(self, evaluator: SentenceLevelEvaluator) -> None:
self.finished = False
self.queue = Queue(maxsize=0)

def record_audio(self, counter):
def record_audio(self):
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1 if sys.platform == "darwin" else 2
RATE = self.sample_rate
RECORD_SECONDS = self.source_segment_size / 1000
RECORD_SECONDS = 100

with wave.open(f"output{counter}.wav", "wb") as wf:
with wave.open(f"output.wav", "wb") as wf:
p = pyaudio.PyAudio()
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)

stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True)

all_data = bytearray()
start = time.time()
for _ in range(0, round(RATE // CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
wf.writeframes(data)
all_data += data
if time.time() - start > 0.5:
self.queue.put(all_data)
all_data = bytearray()
start = time.time()

self.float_array = byte_to_float(data).tolist()
self.queue.put(all_data)

stream.close()
p.terminate()
self.finished = True

def read_from_audio(self):
CHUNK = 1024
Expand Down Expand Up @@ -134,45 +142,46 @@ def read_from_audio(self):
def remote_eval(self):
# Initialization
self.system_reset()
recording = threading.Thread(target=self.read_from_audio)
recording = threading.Thread(target=self.record_audio)
recording.start()

# while not self.finished or not self.queue.empty():
# # print(self.queue.qsize())
# data = byte_to_float(self.queue.get()).tolist()
# # print(self.queue.qsize())
# segment = SpeechSegment(
# index=self.source_segment_size,
# content=data,
# sample_rate=self.sample_rate,
# finished=False,
# )
# self.send_source(segment)
# output_segment = self.receive_prediction()
# # import pdb

# # pdb.set_trace()
# prediction_list = str(output_segment.content.replace(" ", ""))
# print(prediction_list, end=" ")
# sys.stdout.flush()
# # time.sleep(1)

print("Recording...")
while not self.finished or not self.queue.empty():
# print(self.queue.qsize())
data = byte_to_float(self.queue.get()).tolist()
# print(self.queue.qsize())
segment = SpeechSegment(
index=self.source_segment_size,
content=data,
sample_rate=self.sample_rate,
finished=False,
)
# Send to VAD
# 1. At the beginning, if sound, send to model. If no sound, skip segment (if not then hallucinations)
# 2. At the end of sentence (VAD), reset model (finished=True)
self.send_source(segment)
output_segment = self.receive_prediction()
# import pdb

# pdb.set_trace()
prediction_list = str(output_segment.content.replace(" ", ""))
print(prediction_list, end=" ")
sys.stdout.flush()
# time.sleep(1)

# print("Recording...")
# counter = 0
# while True:
# self.record_audio(counter)
# counter += 1
# segment = SpeechSegment(
# index=self.source_segment_size,
# content=self.float_array,
# sample_rate=self.sample_rate,
# finished=False,
# )
# self.send_source(segment)
# output_segment = self.receive_prediction()
# prediction_list = str(output_segment.content.replace(" ", ""))
# print(prediction_list, end=" ")
# sys.stdout.flush()


def pcm2float(sig, dtype="float32"):
Expand Down

0 comments on commit ccfaaa5

Please sign in to comment.