Java实现Shazam声音识别算法的实例代码

shazam算法采用傅里叶变换将时域信号转换为频域信号，并获得音频指纹，最后匹配指纹契合度来识别音频。

1、audiosystem获取音频

奈奎斯特-香农采样定理告诉我们，为了能捕获人类能听到的声音频率，我们的采样速率必须是人类听觉范围的两倍。人类能听到的声音频率范围大约在20hz到20000hz之间，所以在录制音频的时候采样率大多是44100hz。这是大多数标准mpeg-1 的采样率。44100这个值最初来源于索尼，因为它可以允许音频在修改过的视频设备上以25帧（pal）或者30帧（ ntsc）每秒进行录制，而且也覆盖了专业录音设备的20000hz带宽。所以当你在选择录音的频率时，选择44100hz就好了。

定义音频格式：

								
									 public   static   float   samplerate =   44100  ; 

									 public   static   int   samplesizeinbits =   16  ; 

									 public   static   int   channels =   2  ;   // double 

									 public   static   boolean   signed =   true  ;   // indicates whether the data is signed or unsigned 

									 public   static   boolean   bigendian =   true  ;   // indicates whether the audio data is stored in big-endian or little-endian order 

									 public   audioformat getformat() { 

									     return   new   audioformat(samplerate, samplesizeinbits, channels, signed, 

									         bigendian); 

									 }

调用麦克风获取音频，保存到out中

								
									 public   static   bytearrayoutputstream out =   new   bytearrayoutputstream();  1 

									      try   { 

									        audioformat format = smartauto.getformat();   // fill audioformat with the settings 

									        dataline.info info =   new   dataline.info(targetdataline.  class  , format); 

									        starttime =   new   date().gettime(); 

									        system.out.println(starttime); 

									        smartauto.line = (targetdataline) audiosystem.getline(info); 

									        smartauto.line.open(format); 

									        smartauto.line.start(); 

									        new   fileanalysis().getdatatoout(  ""  ); 

									        while   (smartauto.running) { 

									          checktime(starttime); 

									        } 

									        smartauto.line.stop(); 

									        smartauto.line.close(); 

									      }   catch   (throwable e) { 

									        e.printstacktrace(); 

									      }

获取到的out数据需要通过傅里叶变换，从时域信号转换为频域信号。

傅里叶变换

								
									 public   complex[] fft(complex[] x) { 

									       int   n = x.length; 

									       // 因为exp(-2i*n*pi)=1，n=1时递归原点 

									       if   (n ==   1  ){ 

									         return   x; 

									       } 

									       // 如果信号数为奇数，使用dft计算 

									       if   (n %   2   !=   0  ) { 

									         return   dft(x); 

									       } 

									       // 提取下标为偶数的原始信号值进行递归fft计算 

									       complex[] even =   new   complex[n /   2  ]; 

									       for   (  int   k =   0  ; k < n /   2  ; k++) { 

									         even[k] = x[  2   * k]; 

									       } 

									       complex[] evenvalue = fft(even); 

									       // 提取下标为奇数的原始信号值进行fft计算 

									       // 节约内存 

									       complex[] odd = even; 

									       for   (  int   k =   0  ; k < n /   2  ; k++) { 

									         odd[k] = x[  2   * k +   1  ]; 

									       } 

									       complex[] oddvalue = fft(odd); 

									       // 偶数+奇数 

									       complex[] result =   new   complex[n]; 

									       for   (  int   k =   0  ; k < n /   2  ; k++) { 

									         // 使用欧拉公式e^(-i*2pi*k/n) = cos(-2pi*k/n) + i*sin(-2pi*k/n) 

									         double   p = -  2   * k * math.pi / n; 

									         complex m =   new   complex(math.cos(p), math.sin(p)); 

									         result[k] = evenvalue[k].add(m.multiply(oddvalue[k])); 

									         // exp(-2*(k+n/2)*pi/n) 相当于 -exp(-2*k*pi/n)，其中exp(-n*pi)=-1(欧拉公式); 

									         result[k + n /   2  ] = evenvalue[k].subtract(m.multiply(oddvalue[k])); 

									       } 

									       return   result; 

									     }

计算out的频域值

								
									 private   void   setfftresult(){ 

									      byte   audio[] = smartauto.out.tobytearray(); 

									      final   int   totalsize = audio.length; 

									      system.out.println(  "totalsize = "   + totalsize); 

									      int   chenksize =   4  ; 

									      int   amountpossible = totalsize/chenksize; 

									      //when turning into frequency domain we'll need complex numbers:  

									      smartauto.results =   new   complex[amountpossible][]; 

									      dftoperate dfaoperate =   new   dftoperate(); 

									      //for all the chunks:  

									      for  (  int   times =   0  ;times < amountpossible; times++) { 

									        complex[] complex =   new   complex[chenksize]; 

									        for  (  int   i =   0  ;i < chenksize;i++) { 

									          //put the time domain data into a complex number with imaginary part as 0:  

									          complex[i] =   new   complex(audio[(times*chenksize)+i],   0  ); 

									        } 

									        //perform fft analysis on the chunk:  

									        smartauto.results[times] = dfaoperate.fft(complex); 

									      } 

									      system.out.println(  "results = "   + smartauto.results.tostring()); 

									    }

总结

以上所述是小编给大家介绍的java实现shazam声音识别算法的实例代码，希望对大家有所帮助，如果大家有任何疑问请给我留言，小编会及时回复大家的。在此也非常感谢大家对网站的支持！

原文链接：https://blog.csdn.net/llhhzz1989/article/details/82585957

查看更多关于Java实现Shazam声音识别算法的实例代码的详细内容...

声明：本文来自网络，不代表【好得很程序员自学网】立场，转载请注明出处：http://haodehen.cn/did251208

更新时间：2023-07-21 阅读：77次