基于C#百度AI和科大汛飞语音合成SDK

一、百度语音合成

百度语音合成C# SDK主要是基于Rest API,需要互联网调用HTTP接口,Rest API 仅支持最多512个汉字,合成的格式文件为MP3,没有其它的格式。如果想离线使用需下载SDK,Android 或IOS。.

1、安装语音合成 C# SDK

C# SDK 现已开源! https://github.com/Baidu-AIP/dotnet-sdk

** 支持平台:.Net Framework 3.5 4.0 4.5, .Net Core 2.0 **

2、方法一:使用Nuget管理依赖 (推荐)

在NuGet中搜索 Baidu.AI,安装最新版即可。

packet地址 https://www.nuget.org/packages/Baidu.AI/

3、源程序界面及代码

基于C#百度AI和科大汛飞语音合成SDK

#region 百度语音
private void simpleButton1_Click(object sender, EventArgs e)
{
    if (spinEdit1.Value <= 0)
    {
        spinEdit1.Focus();
        return;
    }
    if (trackBarControl1.Value <= 0)
    {
        trackBarControl1.Focus();
        return;
    }
    if (string.IsNullOrEmpty(textBox1.Text.Trim()))
    {
        textBox1.Focus();
        textBox1.Select();
        return;
    }
    // 设置APPID/AK/SK
    var APP_ID = "******";
    var API_KEY = "******";
    var SECRET_KEY = "*****";
    var client = new Baidu.Aip.Speech.Tts(API_KEY, SECRET_KEY);
    client.Timeout = 60000;  // 修改超时时间
    // 可选参数
    var option = new Dictionary<string, object>()
    {
        {"spd", spinEdit1.Value}, // 语速
        {"vol", trackBarControl1.Value}, // 音量
        {"per", comboBoxEdit1.SelectedIndex}  // 发音人,4:情感度丫丫童声
    };
    var result = client.Synthesis(textBox1.Text, option);
    if (xtraSaveFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
    {
        if (result.ErrorCode == 0)  // 或 result.Success
        {
            File.WriteAllBytes(xtraSaveFileDialog1.FileName, result.Data);
        }
    }
}
#endregion

接口参数说明:

基于C#百度AI和科大汛飞语音合成SDK

二、科大讯飞语音合成 

科大讯飞没有c# SDK,采用WebAPi的形式调用。不过请注意该接口使用的HTTP API协议不支持跨域。

1、接口调用流程

注: 调用接口前需配置IP白名单,IP白名单规则请参照 IP白名单。(由于我之前没有设置正确的IP,导致接口调用不成功)可以在百度里面输入IP将会显示你的互联网IP

  1. 通过接口密钥基于MD5计算签名,将签名以及其他参数放在Http Request Header中 。

  2. 将文本数据放在Http Request Body中 。

  3. 向服务器端发送Http请求后,接收服务器端的返回结果。

基于C#百度AI和科大汛飞语音合成SDK

注: 在控制台添加服务后,点击“发音人管理”可自行添加并试用发音人,添加后会显示该发音人参数值,设置参数voice_name为相应的发音人参数值即可。

2、程序界面及源代码

基于C#百度AI和科大汛飞语音合成SDK

源代码

public class Rootobject
{
    public string auf { get; set; }
    public string aue { get; set; }
    public string voice_name { get; set; }
    public string speed { get; set; }
    public string volume { get; set; }
    public string pitch { get; set; }
    public string engine_type { get; set; }
    public string text_type { get; set; }
}

String Md5(string s)
{
    System.Security.Cryptography.MD5 md5 = new System.Security.Cryptography.MD5CryptoServiceProvider();
    byte[] bytes = System.Text.Encoding.UTF8.GetBytes(s);
    bytes = md5.ComputeHash(bytes);
    md5.Clear();
    string ret = "";
    for (int i = 0; i < bytes.Length; i++)
    {
        ret += Convert.ToString(bytes[i], 16).PadLeft(2, '0');
    }
    return ret.PadLeft(32, '0');
}

#region 把流转换成缓存流
MemoryStream StreamToMemoryStream(Stream instream)
{
    MemoryStream outstream = new MemoryStream();
    const int bufferLen = 4096;
    byte[] buffer = new byte[bufferLen];
    int count = 0;
    while ((count = instream.Read(buffer, 0, bufferLen)) > 0)
    {
        outstream.Write(buffer, 0, count);
    }
    return outstream;
}
#endregion

#region 把缓存流转换成字节组
public static byte[] streamTobyte(MemoryStream memoryStream)
{
    byte[] buffer = new byte[memoryStream.Length];
    memoryStream.Seek(0, SeekOrigin.Begin);
    memoryStream.Read(buffer, 0, buffer.Length);
    return buffer;
}
#endregion

private void simpleButton2_Click(object sender, EventArgs e)
{
        // 应用APPID(必须为webapi类型应用,并开通语音合成服务,参考帖子如何创建一个webapi应用:http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=36481)
        string appID = "****";
        // 接口密钥(webapi类型应用开通合成服务后,控制台--我的应用---语音合成---相应服务的apikey)
        string APIKey = "****";
        // 语音合成webapi接口地址
        String url = "http://api.xfyun.cn/v1/service/v1/tts";
        String bodys;
        // 待合成文本
        string text = memoEdit1.Text;
        // 对要合成语音的文字先用utf-8然后进行URL加密
        byte[] textData = Encoding.UTF8.GetBytes(text);

        text = HttpUtility.UrlEncode(textData);
        bodys = string.Format("text={0}", text);

        //aue = raw, 音频文件保存类型为 wav或者pcm
        //aue = lame, 音频文件保存类型为 mp3
        string AUE = "lame";

        Rootobject root = new Rootobject();
        root.aue = AUE;
        root.auf = "audio/L16;rate=16000";
        root.speed = speed.Value.ToString();
        root.pitch = pitch.Value.ToString();
        root.volume = volume.Value.ToString();
        root.voice_name = voice_name.Text.Split('-')[0];
        root.engine_type = engine_type.Text.Split('-')[0];
        root.text_type = "text";
        string param = Newtonsoft.Json.JsonConvert.SerializeObject(root);
        // 获取十位的时间戳
        TimeSpan ts = DateTime.UtcNow - new DateTime(1970, 1, 1, 0, 0, 0, 0);
        string curTime = Convert.ToInt64(ts.TotalSeconds).ToString();
        // 对参数先utf-8然后用base64编码
        byte[] paramData = Encoding.UTF8.GetBytes(param);
        string paraBase64 = Convert.ToBase64String(paramData);
        // 形成签名
        string checkSum = Md5(APIKey + curTime + paraBase64);
        // 组装http请求头
        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
        request = (HttpWebRequest)WebRequest.Create(url);
        request.Method = "POST";
        request.ContentType = "application/x-www-form-urlencoded";
        request.Headers.Add("X-Param", paraBase64);
        request.Headers.Add("X-CurTime", curTime);
        request.Headers.Add("X-Appid", appID);
        request.Headers.Add("X-CheckSum", checkSum);

        Stream requestStream = request.GetRequestStream();
        StreamWriter streamWriter = new StreamWriter(requestStream, Encoding.GetEncoding("gb2312"));
        streamWriter.Write(bodys);
        streamWriter.Close();

        String htmlStr = string.Empty;
        HttpWebResponse response = request.GetResponse() as HttpWebResponse;
        Stream responseStream = response.GetResponseStream();
        using (StreamReader reader = new StreamReader(responseStream, Encoding.GetEncoding("UTF-8")))
        {
            string header_type = response.Headers["Content-Type"];
            if (header_type == "audio/mpeg")
            {
                Stream st = response.GetResponseStream();
                MemoryStream memoryStream = StreamToMemoryStream(st);
                if (xtraSaveFileDialog2.ShowDialog() == System.Windows.Forms.DialogResult.OK)
                {
                    // 保存音频文件地址和音频格式类型
                    File.WriteAllBytes(xtraSaveFileDialog2.FileName, streamTobyte(memoryStream));
                }
                Console.WriteLine(response.Headers);
                Console.ReadLine();
            }
            else
            {
                htmlStr = reader.ReadToEnd();
                Console.WriteLine(htmlStr);
                Console.ReadLine();
            }
        }
        responseStream.Close();
    }
    #endregion
}

通过尝试百度和科大讯飞两个语音合成接口,发现科大讯飞语音比较好一些,百度有些词语会读错。