admin管理员组文章数量:1306706
I used an Azure speech to text API to get a text from audio, I used Arabic audio, the result of API the text does not match the speech in the audio files. How to improve the result to match the speech in audio?
I faced another problem trying to separate two speakers, Guest1 and Guest2. The result of API the text is not matching the speech in the audio file. How handle this?
I used the Azure speech to text API in an ASP.NET Core MVC app, the result did not match the speech on audio, I used this for a calls in call centers
This my action Upload
audio that take audio for convert to text by speech API in Azure :
using Microsoft.AspNetCore.Mvc;
using Microsoft.EntityFrameworkCore;
using SpeechAnalytics.Backend.Services;
using SpeechAnalytics.Backend.Services.backgroundTask;
using SpeechAnalytics.Backend.ViewModel;
using SpeechAnalytics.Core.Entities;
namespace SpeechAnalytics.Backend.Controllers
{
public class AudioController : BaseController
{
private readonly IWebHostEnvironment _webHostEnvironment;
private readonly SpeechWordsCalculateService _speechWordsService;
// private TranscriptionConvertService _transcription;
private AudioTranscriptionTask _audioTranscriptionTask;
public AudioController(AudioTranscriptionTask audioTranscriptionTask, IWebHostEnvironment webHostEnvironment, SpeechWordsCalculateService speechWordsService)
{
// _transcription = transcription;
_audioTranscriptionTask = audioTranscriptionTask;
_webHostEnvironment = webHostEnvironment;
_speechWordsService = speechWordsService;
}
public IActionResult UpladAudio()
{
AudioVM model = new() { Date = DateTime.Now };
return View(model);
}
//Done action
[HttpPost]
public async Task<IActionResult> UpladAudio(AudioVM audio)
{
#region PreviousNotEnqueueCode
// Save the udio information to the database
//var model = new Audio
//{
// FilePath = audio.FilePath,
// AudioName = audio.AudioName,
// audioStatus = AudioStatus.WaitTranscribing,
// UserId = CurrentUserData.UserId,
// Date = audio.Date
//};
//await _context.Audios.AddAsync(model);
//await _context.SaveChangesAsync();
//string attachmentFolderPath = Path.Combine(_webHostEnvironment.WebRootPath, "Attachments");
//string audioFilePath = Path.Combine(attachmentFolderPath, audio.FilePath);
//var res = await _transcription.ConversationTranscriber(audioFilePath);
//if (res.Success == true)
//{
// model.audioStatus = AudioStatus.Transcribed;
// _context.Audios.Update(model);
// var TranscriptionModel = new AudioTranscription()
// {
// AudioId = model.Id,
// Transcription = JsonConvert.SerializeObject(res.Transcriptions),
// IsDeleted = false
// };
// _context.AudioTranscriptions.Add(TranscriptionModel);
//}
//else
//{
// model.audioStatus = AudioStatus.NotTranscribed;
// _context.Audios.Update(model);
//}
//_context.SaveChanges();
#endregion
var StandreadQuota = TimeSpan.FromSeconds(_context.Settings.FirstOrDefault().QuotaSystemSeconds);
var audioDurationSum = (int) _context.Audios.Where(a => a.UserId == CurrentUserData.UserId && a.audioStatus == AudioStatus.Transcribed && a.IsDeleted != true).Sum(a => a.audioDuration);
var ReminingQuota = (int) Math.Round(StandreadQuota.Subtract(TimeSpan.FromSeconds(audioDurationSum)).TotalSeconds);
var audioDuration = (int) GetAudioDuration(Path.Combine(_webHostEnvironment.WebRootPath, "Attachments", audio.FilePath));
// Save the audio information to the database
var model = new Audio
{
FilePath = audio.FilePath,
AudioName = audio.AudioName,
audioDuration = GetAudioDuration(Path.Combine(_webHostEnvironment.WebRootPath, "Attachments", audio.FilePath)),
audioStatus = AudioStatus.WaitTranscribing,
UserId = CurrentUserData.UserId,
Date = audio.Date
};
if(audioDuration > ReminingQuota)
{
return Json(-1);
}
await _context.Audios.AddAsync(model);
await _context.SaveChangesAsync();
// Enqueue the audio transcription task as a background task
//Task.Run(() => _audioTranscriptionTask.ExecuteAsync(Path.Combine(_webHostEnvironment.WebRootPath, "Attachments", audio.FilePath), model.Id , model.UserId , audioDuration));
Task.Run(async () =>
{
await _audioTranscriptionTask.ExecuteAsync(Path.Combine(_webHostEnvironment.WebRootPath, "Attachments", audio.FilePath), model.Id, model.UserId, audioDuration);
await _speechWordsService.CalculateSpeechWordsInAllTranscription(model.UserId);
});
return Json(1);
}
}
}
And in the Upload Audio action that contain a ExecuteAsync
this method contain the method that connect on the Azure speech to text API and this code of it :
public async Task ExecuteAsync(string audioFilePath, int audioId , int userId , int audioDuration)
{
Audio audio = new();
var result = await _transcriptionService.ConversationTranscriber(audioFilePath);
using (var scope = new TransactionScope(TransactionScopeAsyncFlowOption.Enabled))
{
var dbContextOptions = new DbContextOptionsBuilder<SpeechAnalyticsDbContext>().UseSqlServer("Server=10.1.1.210;Database=SpeechAnalyticsDB;User Id=sa;Password=sa_2014;TrustServerCertificate=true;").Options;
using (var context = new SpeechAnalyticsDbContext(dbContextOptions))
{
audio = await context.Audios.FindAsync(audioId);
#region Calculate Quota
var StandreadQuota = TimeSpan.FromSeconds(context.Settings.FirstOrDefault().QuotaSystemSeconds);
var audioDurationSum = (int)context.Audios.Where(a => a.UserId == userId && a.audioStatus == AudioStatus.Transcribed && a.IsDeleted != true).Sum(a => a.audioDuration);
var ReminingQuota = (int)Math.Round(StandreadQuota.Subtract(TimeSpan.FromSeconds(audioDurationSum)).TotalSeconds);
#endregion
//var User = context.Users.Find(UserId);
if (result.Success)
{
if (audioDuration <= ReminingQuota)
{
audio.audioStatus = AudioStatus.Transcribed;
var transcriptionModel = new AudioTranscription()
{
AudioId = audio.Id,
Transcription = JsonConvert.SerializeObject(result.Transcriptions),
IsDeleted = false
};
context.AudioTranscriptions.Add(transcriptionModel);
}
else
audio.audioStatus = AudioStatus.OutOfQuota;
}
else
audio.audioStatus = AudioStatus.NotTranscribed;
context.Audios.Update(audio);
await context.SaveChangesAsync();
scope.Complete();
}
// Notify User that the audio status has been updated
await _hubContext.Clients.All.SendAsync("AudioListUpdated", audioId , audio.audioStatus);
}
}
The ConversationTranscriber
method this connect to azure speech to API directly and this code :
public async Task<dynamic> ConversationTranscriber(string path)
{
try
{
//not clean
var speechConfig = SpeechConfig.FromSubscription("", "eastus");
var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "ar-EG", "en-US" });
speechConfig.OutputFormat = OutputFormat.Detailed;
speechConfig.EnableDictation();
//speechConfig.SetProperty("Profanity", "masked"); // Handle profanity masking if required
// speechConfig.SetProperty("NoiseSuppression", "Auto"); // Enable automatic noise suppression
var stopRecognition = new TaskCompletionSource<int>(TaskCreationOptions.RunContinuationsAsynchronously);
// Create an audio stream from a WAV file or from the default microphone
TranscriptionVM transcription = new TranscriptionVM();
//befin treanscribe the audio
using (var audioConfig = AudioConfig.FromWavFileInput(path))
{
using (var conversationTranscriber = new ConversationTranscriber(speechConfig, autoDetectSourceLanguageConfig, audioConfig))
{
var transcriptions = new List<TranscriptionVM>();
conversationTranscriber.Transcribed += (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
var detailedResults = e.Result.Best();
var words = new List<WordTimestampVM>();
if (detailedResults != null && detailedResults.Any())
{
var bestResults = detailedResults.ToList()[0];
transcription = new TranscriptionVM
{
SpeakerId = e.Result.SpeakerId,
Text = e.Result.Text,
StartTime = e.Result.OffsetInTicks / (10_000_000d * 60),
Duration = e.Result.Duration.Ticks / (10_000_000d * 60),
Words = bestResults.Words?.Select(a => new WordTimestampVM()
{
Word = a?.Word
}).ToList(),
};
}
transcriptions.Add(transcription);
}
};
conversationTranscriber.Canceled += (s, e) =>
{
// Handle cancellation scenario if needed
};
conversationTranscriber.SessionStopped += (s, e) =>
{
// Handle session stopped scenario if needed
stopRecognition.TrySetResult(0);
};
await conversationTranscriber.StartTranscribingAsync();
// Wait for completion
await stopRecognition.Task;
await conversationTranscriber.StopTranscribingAsync();
// Check if transcriptions were generated
if (transcriptions.Count > 0)
{
var response = new
{
Success = true,
Transcriptions = transcriptions
};
return response;
}
else
{
var response = new
{
Success = false,
Message = "Transcription failed. No transcriptions were generated."
};
return response;
}
}
}
}
catch (Exception ex)
{
// Handle any exceptions that occur during transcription
var response = new
{
Success = false,
Message = "Transcription failed: " + ex.Message
};
return response;
}
}
本文标签: cHow to use Azure Speech to text API to make a transcript result for arabic audiosStack Overflow
版权声明:本文标题:c# - How to use Azure Speech to text API to make a transcript result for arabic audios - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1741821212a2399374.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论