SDK (Software Development Kit) 软件开发工具包是软件开发工程师用于为特定的软件包、软件框架、硬件平台、操作系统等建立应用软件的开发工具的集合。Android SDK就 Android专属的软件开发工具包。
录音权限申请 //申请录音权限
private static final int GET_RECODE_AUDIO = 1;
private static String[] PERMISSION_AUDIO = {
Manifest.permission.RECORD_AUDIO
};
public static void verifyAudioPermissions(Activity activity) {
int permission = ActivityCompat.checkSelfPermission(activity,
Manifest.permission.RECORD_AUDIO);
if (permission != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(activity, PERMISSION_AUDIO,
GET_RECODE_AUDIO);
}
}
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
verifyAudioPermissions(this);//申请录音权限
}
讯飞SDK
讯飞开放平台
注册账号,下载demo,并将相应jar包和资源导入自己项目语音配置对象初始化
SpeechUtility.createUtility(this, SpeechConstant.APPID + "=申请的APPID");
语音识别
//存放听写分析结果文本
private HashMap hashMapTexts = new LinkedHashMap();
private void listen() {
// 1.创建SpeechRecognizer对象,第2个参数:本地听写时传InitListener
//听写对象
SpeechRecognizer hearer = SpeechRecognizer.createRecognizer(getActivity(), null);
// 交互动画
//讯飞提示框
RecognizerDialog dialog = new RecognizerDialog(getActivity(), null);
// 2.设置听写参数,详见《科大讯飞MSC API手册(Android)》SpeechConstant类
//设置语法ID和 SUBJECT 为空,以免因之前有语法调用而设置了此参数;或直接清空所有参数,具体可参考 DEMO 的示例。
hearer.setParameter( SpeechConstant.CLOUD_GRAMMAR, null );
hearer.setParameter( SpeechConstant.SUBJECT, null );
// domain:域名
hearer.setParameter(SpeechConstant.DOMAIN, "iat");
//设置语音输入语言,zh_cn为简体中文 mandarin:普通话
hearer.setParameter(SpeechConstant.LANGUAGE, "zh_cn");
hearer.setParameter(SpeechConstant.ACCENT, "mandarin");
// 设置语音前端点:静音超时时间,单位ms,即用户多长时间不说话则当做超时处理
//取值范围{1000~10000}
hearer.setParameter(SpeechConstant.VAD_BOS, "4000");
//设置语音后端点:后端点静音检测时间,单位ms,即用户停止说话多长时间内即认为不再输入,
//自动停止录音,范围{0~10000}
hearer.setParameter(SpeechConstant.VAD_EOS, "2000");
//设置是否显示标点0表示不显示,1表示显示
hearer.setParameter(SpeechConstant.ASR_PTT,"1");
//3.开始听写
dialog.setListener(new RecognizerDialogListener() { //设置对话框
@Override
public void onResult(RecognizerResult results, boolean isLast) {
// TODO 自动生成的方法存根
Log.d("Result", results.getResultString());
//(1) 解析 json 数据<>
StringBuffer strBuffer = new StringBuffer();
try {
JSONTokener tokener = new JSONTokener(results.getResultString());
Log.i("TAG", "Test"+results.getResultString());
Log.i("TAG", "Test"+results.toString());
JSONObject joResult = new JSONObject(tokener);
JSONArray words = joResult.getJSONArray("ws");
for (int i = 0; i < words.length(); i++) {
// 转写结果词,默认使用第一个结果
JSONArray items = words.getJSONObject(i).getJSONArray("cw");
JSONObject obj = items.getJSONObject(0);
strBuffer.append(obj.getString("w"));
}
} catch (Exception e) {
e.printStackTrace();
}
// (2)读取json结果中的sn字段
String sn = null;
try {
JSONObject resultJson = new JSONObject(results.getResultString());
sn = resultJson.optString("sn");
} catch (JSONException e) {
e.printStackTrace();
}
//(3) 解析语音文本<>
hashMapTexts.put(sn, strBuffer.toString());
StringBuffer resultBuffer = new StringBuffer(); //最后结果
for (String key : hashMapTexts.keySet()) {
resultBuffer.append(hashMapTexts.get(key));
}
//显示识别得到的文字
et_value.setText(resultBuffer.toString());
et_value.requestFocus();//获取焦点
et_value.setSelection(resultBuffer.toString().length());//将光标定位到文字最后,以便修改
}
@Override
public void onError(SpeechError error) {
// TODO 自动生成的方法存根
error.getPlainDescription(true);
}
});
dialog.show(); //显示对话框
}
语音合成(封装)
public class SpeechCompound {
// Log标签
private static final String TAG = "SpeechCompound";
// 上下文
private Context mContext;
// 语音合成对象
private static SpeechSynthesizer mTts;
public final static String[] COLOUD_VOICERS_VALUE = {"aisjiuxu", "xiaoyu", "catherine", "henry", "vimary", "vixy", "xiaoqi", "vixf", "xiaomei",
"xiaolin", "xiaorong", "xiaoqian", "xiaokun", "xiaoqiang", "vixying", "xiaoxin", "nannan", "vils",};
public SpeechCompound(Context context) {
Log.d("tag54", "初始化失败,错ss 误码:" );
// 上下文
mContext = context;
// 初始化合成对象
mTts = SpeechSynthesizer.createSynthesizer(mContext, new InitListener() {
@Override
public void onInit(int code) {
if (code != ErrorCode.SUCCESS) {
Log.d("tag54", "初始化失败,错误码:" + code);
}
Log.d("tag54", "初始化失败,q错误码:" + code);
}
});
}
public void speaking(String text) {
// 非空判断
if (TextUtils.isEmpty(text)) {
return;
}
int code = mTts.startSpeaking(text, mTtsListener);
Log.d("tag54","-----"+code+"++++++++++");
if (code != ErrorCode.SUCCESS) {
if (code == ErrorCode.ERROR_COMPONENT_NOT_INSTALLED) {
Toast.makeText(mContext, "没有安装语音+ code = " + code, Toast.LENGTH_SHORT).show();
} else {
Toast.makeText(mContext, "语音合成失败,错误码: " + code, Toast.LENGTH_SHORT).show();
}
}
}
public static void stopSpeaking() {
// 对象非空并且正在说话
if (null != mTts && mTts.isSpeaking()) {
// 停止说话
mTts.stopSpeaking();
}
}
public static boolean isSpeaking() {
if (null != mTts) {
return mTts.isSpeaking();
} else {
return false;
}
}
private SynthesizerListener mTtsListener = new SynthesizerListener() {
@Override
public void onSpeakBegin() {
Log.i(TAG, "开始播放");
}
@Override
public void onSpeakPaused() {
Log.i(TAG, "暂停播放");
}
@Override
public void onSpeakResumed() {
Log.i(TAG, "继续播放");
}
@Override
public void onBufferProgress(int percent, int beginPos, int endPos, String info) {
// TODO 缓冲的进度
Log.i(TAG, "缓冲 : " + percent);
}
@Override
public void onSpeakProgress(int percent, int beginPos, int endPos) {
// TODO 说话的进度
Log.i(TAG, "合成 : " + percent);
}
@Override
public void onCompleted(SpeechError error) {
if (error == null) {
Log.i(TAG, "播放完成");
} else if (error != null) {
Log.i(TAG, error.getPlainDescription(true));
}
}
@Override
public void onEvent(int eventType, int arg1, int arg2, Bundle obj) {
}
};
private void setParam() {
// 清空参数
mTts.setParameter(SpeechConstant.PARAMS, null);
// 引擎类型 网络
mTts.setParameter(SpeechConstant.ENGINE_TYPE, SpeechConstant.TYPE_CLOUD);
// 设置发音人
mTts.setParameter(SpeechConstant.VOICE_NAME, COLOUD_VOICERS_VALUE[0]);
// 设置语速
mTts.setParameter(SpeechConstant.SPEED, "50");
// 设置音调
mTts.setParameter(SpeechConstant.PITCH, "50");
// 设置音量
mTts.setParameter(SpeechConstant.VOLUME, "100");
// 设置播放器音频流类型
mTts.setParameter(SpeechConstant.STREAM_TYPE, "3");
// mTts.setParameter(SpeechConstant.TTS_AUDIO_PATH, Environment.getExternalStorageDirectory() + "/KRobot/wavaudio.pcm");
// 背景音乐 1有 0 无
// mTts.setParameter("bgs", "1");
}
}
语音合成(调用)
public class MainActivity extends AppCompatActivity {
private EditText edt;
private Button btn;
private SpeechCompound speechCompound;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
edt= findViewById(R.id.et_text);
btn= findViewById(R.id.btn);
speechCompound=new SpeechCompound(this);
btn.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
speechCompound.speaking(edt.getText().toString().trim());
}
});
}
}
问题
讯飞语音合成,中文还算可以,但英文句子效果很差,纯英文的发音人需要付费,故考虑百度语音合成,百度语音合成虽然免费,但语音识别是收费的,最后考虑Android调用自带TTS文本转语音引擎实现离线语音合成,缺点是音质较差等。
百度AI平台
TextToSpeech
public class SpeechUtils {
private Context mcontext;
private static final String TAG = "SpeechUtils";
private static TextToSpeech textToSpeech; // TTS对象
public SpeechUtils(Context context) {
mcontext = context;
textToSpeech = new TextToSpeech(mcontext, new TextToSpeech.OnInitListener() {
@Override
public void onInit(int i) {
if (i == TextToSpeech.SUCCESS) {
//textToSpeech.setLanguage(Locale.US);
//textToSpeech.setPitch(1.0f);// 设置音调,值越大声音越尖(女生),值越小则变成男声,1.0是常规
//textToSpeech.setSpeechRate(0.8f);
textToSpeech.speak("", TextToSpeech.QUEUE_FLUSH, null);
}
}
});
}
//语音合成
public void speakText(String text) {
if (textToSpeech != null) {
textToSpeech.speak(text,
TextToSpeech.QUEUE_FLUSH, null);
}
}
//停止但不关闭
public void stopSpeaking() {
// 对象非空并且正在说话
if (null != textToSpeech && textToSpeech.isSpeaking()) {
// 停止说话
textToSpeech.stop();
}
}
//停止并关闭
public void shutdownSpeaking() {
// 对象非空并且正在说话
if (null != textToSpeech && textToSpeech.isSpeaking()) {
// 停止说话
textToSpeech.stop();
textToSpeech.shutdown(); // 关闭,释放资源
}
}
}
调用与上述类似。
另外,语言、音高、语速可以在代码中设置,也可以在手机中
设置>辅助功能>无障碍>文字转语音(TTS)输出
设置
注意:部分手机自带讯飞的语音合成(如荣耀),好像就只能调用讯飞的。
作者:Leslie_Waong