Back to Templates
🎯 Purpose:
Generate audio files from text scripts stored in Google Drive.
🔁 Flow:
📦 Dependencies:
/scripts/generate_voice.py
✏️ Notes:
.txt
📦 /scripts/generate_voice.py
:
import sys
import torch
import numpy
import re
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
# Patch to allow numpy._core.multiarray.scalar during loading
torch.serialization.add_safe_globals([numpy._core.multiarray.scalar])
# Monkey patch torch.load to force weights_only=False
_original_torch_load = torch.load
def patched_torch_load(f, *args, **kwargs):
if 'weights_only' not in kwargs:
kwargs['weights_only'] = False
return _original_torch_load(f, *args, **kwargs)
torch.load = patched_torch_load
# Preload Bark models
preload_models()
def split_text(text, max_len=300):
# Split on punctuation to avoid mid-sentence cuts
sentences = re.split(r'(?<=[.?!])\s+', text)
chunks = []
current = ""
for sentence in sentences:
if len(current) + len(sentence) < max_len:
current += sentence + " "
else:
chunks.append(current.strip())
current = sentence + " "
if current:
chunks.append(current.strip())
return chunks
# Input text file and output path
input_text_path = sys.argv[1]
output_wav_path = sys.argv[2]
with open(input_text_path, 'r', encoding='utf-8') as f:
full_text = f.read()
voice_preset = "v2/en_speaker_7"
chunks = split_text(full_text)
# Generate and concatenate audio chunks
audio_arrays = []
for chunk in chunks:
print(f"Generating audio for chunk: {chunk[:50]}...")
audio = generate_audio(chunk, history_prompt=voice_preset)
audio_arrays.append(audio)
# Merge all audio chunks
final_audio = numpy.concatenate(audio_arrays)
# Write final .wav file
write_wav(output_wav_path, SAMPLE_RATE, final_audio)
print(f"Full audio generated at: {output_wav_path}")