프로젝트

일반

사용자정보

통계
| 개정판:

t1 / TFDContents / Assets / KinectScripts / SpeechManager.cs @ 3

이력 | 보기 | 이력해설 | 다운로드 (14 KB)

1
using UnityEngine;
2
using System;
3
using System.Collections;
4
using System.IO;
5
using System.Collections.Generic;
6
using System.Text;
7

    
8

    
9
/// <summary>
10
/// This interface needs to be implemented by all speech-recognition listeners
11
/// </summary>
12
public interface SpeechRecognitionInterface
13
{
14
	/// <summary>
15
	/// Invoked when speech phrase gets recognized.
16
	/// </summary>
17
	/// <returns><c>true</c>, if the recognized phrase has to be cleared, <c>false</c> otherwise.</returns>
18
	/// <param name="phraseTag">The phrase tag.</param>
19
	/// <param name="condidence">Recognized with condidence (0-1).</param>
20
	bool SpeechPhraseRecognized(string phraseTag, float condidence);
21
}
22

    
23
/// <summary>
24
/// Speech manager is the component that manages the Kinect speech recognition.
25
/// </summary>
26
public class SpeechManager : MonoBehaviour 
27
{
28
	[Tooltip("File name of the grammar file, used by the speech recognizer. The file will be copied from Resources, if it does not exist.")]
29
	public string grammarFileName = "SpeechGrammar.grxml";
30

    
31
	[Tooltip("Whether the grammar is dynamic or static. Dynamic grammars allow adding phrases at run-time.")]
32
	public bool dynamicGrammar = false;
33
	
34
	[Tooltip("Code of the language, used by the speech recognizer. Default is English (1033).")]
35
	public int languageCode = 1033;
36

    
37
	[Tooltip("Minimum confidence required, to consider a phrase as recognized. Confidence varies between 0.0 and 1.0.")]
38
	public float requiredConfidence = 0f;
39

    
40
	[Tooltip("List of the speech recognition listeners in the scene. If the list is empty, the available gesture listeners will be detected at the scene start up.")]
41
	public List<MonoBehaviour> speechRecognitionListeners;
42

    
43
	[Tooltip("GUI-Text to display the speech-manager debug messages.")]
44
	public GUIText debugText;
45

    
46
	// Is currently listening
47
	private bool isListening;
48
	
49
	// Current phrase recognized
50
	private bool isPhraseRecognized;
51
	private string phraseTagRecognized;
52
	private float phraseConfidence;
53
	
54
	// primary sensor data structure
55
	private KinectInterop.SensorData sensorData = null;
56
	
57
	// Bool to keep track of whether Kinect and SAPI have been initialized
58
	private bool sapiInitialized = false;
59
	
60
	// The single instance of SpeechManager
61
	private static SpeechManager instance;
62
	
63
	
64
	/// <summary>
65
	/// Gets the single SpeechManager instance.
66
	/// </summary>
67
	/// <value>The SpeechManager instance.</value>
68
    public static SpeechManager Instance
69
    {
70
        get
71
        {
72
            return instance;
73
        }
74
    }
75
	
76
	/// <summary>
77
	/// Determines whether SAPI (Speech API) was successfully initialized.
78
	/// </summary>
79
	/// <returns><c>true</c> if SAPI was successfully initialized; otherwise, <c>false</c>.</returns>
80
	public bool IsSapiInitialized()
81
	{
82
		return sapiInitialized;
83
	}
84

    
85
	/// <summary>
86
	/// Adds a phrase to the from-rule of dynamic grammar. If the to-rule is empty, this means end of the phrase recognition.
87
	/// </summary>
88
	/// <returns><c>true</c> if the phrase was successfully added to the grammar; otherwise, <c>false</c>.</returns>
89
	/// <param name="fromRule">From-rule name.</param>
90
	/// <param name="toRule">To-rule name or empty string.</param>
91
	/// <param name="phrase">The dynamic phrase.</param>
92
	/// <param name="bClearRulePhrases">If set to <c>true</c> clears current rule phrases before adding this one.</param>
93
	/// <param name="bCommitGrammar">If set to <c>true</c> commits dynamic grammar changes.</param>
94
	public bool AddGrammarPhrase(string fromRule, string toRule, string phrase, bool bClearRulePhrases, bool bCommitGrammar)
95
	{
96
		if(sapiInitialized)
97
		{
98
			int hr = sensorData.sensorInterface.AddGrammarPhrase(fromRule, toRule, phrase, bClearRulePhrases, bCommitGrammar);
99
			return (hr == 0);
100
		}
101

    
102
		return false;
103
	}
104
	
105
	/// <summary>
106
	/// Determines whether the speech recogizer is in listening-state.
107
	/// </summary>
108
	/// <returns><c>true</c> if the speech recogizer is in listening-state; otherwise, <c>false</c>.</returns>
109
	public bool IsListening()
110
	{
111
		return isListening;
112
	}
113
	
114
	/// <summary>
115
	/// Determines whether the speech recognizer has recognized a phrase.
116
	/// </summary>
117
	/// <returns><c>true</c> if the speech recognizer has recognized a phrase; otherwise, <c>false</c>.</returns>
118
	public bool IsPhraseRecognized()
119
	{
120
		return isPhraseRecognized;
121
	}
122

    
123
	/// <summary>
124
	/// Gets the confidence of the currently recognized phrase, in range [0, 1].
125
	/// </summary>
126
	/// <returns>The phrase confidence.</returns>
127
	public float GetPhraseConfidence()
128
	{
129
		return phraseConfidence;
130
	}
131
	
132
	/// <summary>
133
	/// Gets the tag of the recognized phrase.
134
	/// </summary>
135
	/// <returns>The tag of the recognized phrase.</returns>
136
	public string GetPhraseTagRecognized()
137
	{
138
		return phraseTagRecognized;
139
	}
140
	
141
	/// <summary>
142
	/// Clears the recognized phrase.
143
	/// </summary>
144
	public void ClearPhraseRecognized()
145
	{
146
		isPhraseRecognized = false;
147
		phraseTagRecognized = String.Empty;
148
		phraseConfidence = 0f;
149
	}
150

    
151

    
152
	// gets speech recognition data as csv line
153
	public string GetSpeechDataAsCsv(char delimiter)
154
	{
155
		if (!sapiInitialized)
156
			return string.Empty;
157

    
158
		// create the output string
159
		StringBuilder sbBuf = new StringBuilder();
160
		sbBuf.Append("sr").Append(delimiter);
161

    
162
		if(isPhraseRecognized)
163
		{
164
			sbBuf.Append(1).Append(delimiter);
165
			sbBuf.Append(phraseTagRecognized).Append(delimiter);
166
			sbBuf.AppendFormat("{0:F3}", phraseConfidence).Append(delimiter);
167

    
168
			//Debug.Log(phraseTagRecognized + ", confidence: " + phraseConfidence);
169
		}
170
		else
171
		{
172
			sbBuf.Append(0).Append(delimiter);
173
		}
174

    
175
		// remove the last delimiter
176
		if(sbBuf.Length > 0 && sbBuf[sbBuf.Length - 1] == delimiter)
177
		{
178
			sbBuf.Remove(sbBuf.Length - 1, 1);
179
		}
180

    
181
		return sbBuf.ToString();
182
	}
183

    
184
	// sets speech recognition data from a csv line
185
	public bool SetSpeechDataFromCsv(string sCsvLine, char[] delimiters)
186
	{
187
		if(sCsvLine.Length == 0)
188
			return false;
189

    
190
		// split the csv line in parts
191
		string[] alCsvParts = sCsvLine.Split(delimiters);
192

    
193
		if(alCsvParts.Length < 1 || alCsvParts[0] != "sr")
194
			return false;
195

    
196
		int iIndex = 1;
197
		int iLength = alCsvParts.Length;
198

    
199
		if (iLength < (iIndex + 1))
200
			return false;
201

    
202
		// whether there is recognized phrase or not
203
		int phraseRecognized = 0;
204
		int.TryParse(alCsvParts[iIndex], out phraseRecognized);
205
		iIndex++;
206

    
207
		if (phraseRecognized != 0 && iLength >= (iIndex + 2)) 
208
		{
209
			// get the recognized phrase
210
			isPhraseRecognized = true;
211
			phraseTagRecognized = alCsvParts[iIndex];
212
			float.TryParse(alCsvParts[iIndex + 1], out phraseConfidence);
213
		}
214
//		else
215
//		{
216
//			// no phrase recognized
217
//			isPhraseRecognized = false;
218
//			phraseTagRecognized = String.Empty;
219
//			phraseConfidence = 0f;
220
//		}
221

    
222
		return true;
223
	}
224

    
225

    
226
	//----------------------------------- end of public functions --------------------------------------//
227

    
228

    
229
	void Awake()
230
	{
231
		instance = this;
232
	}
233

    
234

    
235
	void Start() 
236
	{
237
		try 
238
		{
239
			// get sensor data
240
			KinectManager kinectManager = KinectManager.Instance;
241
			if(kinectManager && kinectManager.IsInitialized())
242
			{
243
				sensorData = kinectManager.GetSensorData();
244
			}
245
			
246
			if(sensorData == null || sensorData.sensorInterface == null)
247
			{
248
				throw new Exception("Speech recognition cannot be started, because KinectManager is missing or not initialized.");
249
			}
250
			
251
			if(debugText != null)
252
			{
253
				debugText.text = "Please, wait...";
254
			}
255
			
256
			// ensure the needed dlls are in place and speech recognition is available for this interface
257
			bool bNeedRestart = false;
258
			if(sensorData.sensorInterface.IsSpeechRecognitionAvailable(ref bNeedRestart))
259
			{
260
				if(bNeedRestart)
261
				{
262
					KinectInterop.RestartLevel(gameObject, "SM");
263
					return;
264
				}
265
			}
266
			else
267
			{
268
				string sInterfaceName = sensorData.sensorInterface.GetType().Name;
269
				throw new Exception(sInterfaceName + ": Speech recognition is not supported!");
270
			}
271
			
272
			// Initialize the speech recognizer
273
			string sCriteria = String.Format("Language={0:X};Kinect=True", languageCode);
274
			int rc = sensorData.sensorInterface.InitSpeechRecognition(sCriteria, true, false);
275
	        if (rc < 0)
276
	        {
277
				string sErrorMessage = (new SpeechErrorHandler()).GetSapiErrorMessage(rc);
278
				throw new Exception(String.Format("Error initializing Kinect/SAPI: " + sErrorMessage));
279
	        }
280
			
281
			if(requiredConfidence > 0)
282
			{
283
				sensorData.sensorInterface.SetSpeechConfidence(requiredConfidence);
284
			}
285
			
286
			if(grammarFileName != string.Empty)
287
			{
288
				// copy the grammar file from Resources, if available
289
				//if(!File.Exists(grammarFileName))
290
				{
291
					TextAsset textRes = Resources.Load(grammarFileName, typeof(TextAsset)) as TextAsset;
292
					
293
					if(textRes != null)
294
					{
295
						string sResText = textRes.text;
296

    
297
#if !NETFX_CORE
298
						File.WriteAllText(grammarFileName, sResText);
299
#else
300
						System.Threading.Tasks.Task task = null;
301

    
302
//						UnityEngine.WSA.Application.InvokeOnUIThread(() =>
303
//						{
304
							task = CopyGrammarFileToStorageAsync(grammarFileName, sResText);
305
//						}, true);
306
						
307
						while (task != null && !task.IsCompleted && !task.IsFaulted)
308
						{
309
							task.Wait(100);
310
						}
311

    
312
						if(task != null)
313
						{
314
							if(task == null)
315
								throw new Exception("Could not create task for CopyGrammarFileToStorageAsync()");
316
							else if(task.IsFaulted)
317
								throw task.Exception;
318
						}
319
#endif
320
					}
321
					else
322
					{
323
						throw new Exception("Couldn't find grammar resource: " + grammarFileName + ".txt");
324
					}
325
				}
326

    
327
				// load the grammar file
328
				rc = sensorData.sensorInterface.LoadSpeechGrammar(grammarFileName, (short)languageCode, dynamicGrammar);
329
		        if (rc < 0)
330
		        {
331
					string sErrorMessage = (new SpeechErrorHandler()).GetSapiErrorMessage(rc);
332
					throw new Exception("Error loading grammar file " + grammarFileName + ": " + sErrorMessage);
333
		        }
334

    
335
//				// test dynamic grammar phrases
336
//				AddGrammarPhrase("addressBook", string.Empty, "Nancy Anderson", true, false);
337
//				AddGrammarPhrase("addressBook", string.Empty, "Cindy White", false, false);
338
//				AddGrammarPhrase("addressBook", string.Empty, "Oliver Lee", false, false);
339
//				AddGrammarPhrase("addressBook", string.Empty, "Alan Brewer", false, false);
340
//				AddGrammarPhrase("addressBook", string.Empty, "April Reagan", false, true);
341
			}
342
			
343
			sapiInitialized = true;
344
			
345
			//DontDestroyOnLoad(gameObject);
346

    
347
			if(debugText != null)
348
			{
349
				debugText.text = "Speech recognizer is ready.";
350
			}
351

    
352
			// try to automatically detect the available speech recognition listeners in the scene
353
			if(speechRecognitionListeners.Count == 0)
354
			{
355
				MonoBehaviour[] monoScripts = FindObjectsOfType(typeof(MonoBehaviour)) as MonoBehaviour[];
356

    
357
				foreach(MonoBehaviour monoScript in monoScripts)
358
				{
359
					if((monoScript is SpeechRecognitionInterface) && monoScript.enabled)
360
					{
361
						speechRecognitionListeners.Add(monoScript);
362
					}
363
				}
364
			}
365

    
366
		} 
367
		catch(DllNotFoundException ex)
368
		{
369
			Debug.LogError(ex.ToString());
370
			if(debugText != null)
371
				debugText.text = "Please check the Kinect and SAPI installations.";
372
		}
373
		catch (Exception ex) 
374
		{
375
			Debug.LogError(ex.ToString());
376
			if(debugText != null)
377
				debugText.text = ex.Message;
378
		}
379
	}
380

    
381
#if NETFX_CORE
382
	private async System.Threading.Tasks.Task CopyGrammarFileToStorageAsync(string grammarFileName, string grammarContent)
383
	{
384
		Windows.Storage.StorageFolder storageFolder = Windows.Storage.ApplicationData.Current.LocalFolder;
385
		Windows.Storage.StorageFile grammarFile = await storageFolder.CreateFileAsync(grammarFileName,
386
			Windows.Storage.CreationCollisionOption.ReplaceExisting);
387
		
388
		await Windows.Storage.FileIO.WriteTextAsync(grammarFile, grammarContent);
389
	}
390
#endif
391

    
392
	void OnDestroy()
393
	{
394
		if(sapiInitialized && sensorData != null && sensorData.sensorInterface != null)
395
		{
396
			// finish speech recognition
397
			sensorData.sensorInterface.FinishSpeechRecognition();
398
		}
399
		
400
		sapiInitialized = false;
401
		instance = null;
402
	}
403
	
404
	void Update () 
405
	{
406
		// start Kinect speech recognizer as needed
407
//		if(!sapiInitialized)
408
//		{
409
//			StartRecognizer();
410
//			
411
//			if(!sapiInitialized)
412
//			{
413
//				Application.Quit();
414
//				return;
415
//			}
416
//		}
417
		
418
		if(sapiInitialized)
419
		{
420
			// update the speech recognizer
421
			int rc = sensorData.sensorInterface.UpdateSpeechRecognition();
422
			
423
			if(rc >= 0)
424
			{
425
				// estimate the listening state
426
				if(sensorData.sensorInterface.IsSpeechStarted())
427
				{
428
					isListening = true;
429
				}
430
				else if(sensorData.sensorInterface.IsSpeechEnded())
431
				{
432
					isListening = false;
433
				}
434

    
435
				// check if a grammar phrase has been recognized
436
				if(sensorData.sensorInterface.IsPhraseRecognized())
437
				{
438
					isPhraseRecognized = true;
439
					phraseConfidence = sensorData.sensorInterface.GetPhraseConfidence();
440
					
441
					phraseTagRecognized = sensorData.sensorInterface.GetRecognizedPhraseTag();
442
					sensorData.sensorInterface.ClearRecognizedPhrase();
443
					
444
					//Debug.Log(phraseTagRecognized);
445
					if(debugText != null)
446
					{
447
						if(isPhraseRecognized)
448
						{
449
							debugText.text = string.Format("{0}  ({1:F1}%)", phraseTagRecognized, phraseConfidence * 100f);
450
						}
451
						else if(isListening)
452
						{
453
							debugText.text = "Listening...";
454
						}
455
					}
456

    
457
					// invoke SpeechPhraseRecognized() of the available speech listeners
458
					bool bClearPhrase = false;
459
					foreach(SpeechRecognitionInterface listener in speechRecognitionListeners)
460
					{
461
						if(listener.SpeechPhraseRecognized(phraseTagRecognized, phraseConfidence))
462
						{
463
							bClearPhrase = true;
464
						}
465
					}
466

    
467
					if(bClearPhrase)
468
					{
469
						ClearPhraseRecognized();
470
					}
471
				}
472

    
473
			}
474
		}
475
	}
476
	
477
//	void OnGUI()
478
//	{
479
//		if(sapiInitialized)
480
//		{
481
//			if(debugText != null)
482
//			{
483
//				if(isPhraseRecognized)
484
//				{
485
//					debugText.text = string.Format("{0}  ({1:F1}%)", phraseTagRecognized, phraseConfidence * 100f);
486
//				}
487
//				else if(isListening)
488
//				{
489
//					debugText.text = "Listening...";
490
//				}
491
//			}
492
//		}
493
//	}
494
	
495
	
496
}