<? php
/* *
* 汉字拼音首字母工具类
* 注: 英文的字串:不变返回(包括数字) eg .abc123 => abc123
* 中文字符串:返回拼音首字符 eg. 测试字符串 => CSZFC
* 中英混合串: 返回拼音首字符和英文 eg. 我i我j => WIWJ
* eg.
* $py = new str2PY();
*
* $result = $py->getInitials('周杰伦');
*
* //获取首字母
* $result = $py->getFirstString('abc'); //A
* $resutl = $py->getFirstString("周杰伦"); //Z
*
*/
class str2py
{
private $_pinyins = array (
176161 => 'A',
176197 => 'B',
178193 => 'C',
180238 => 'D',
182234 => 'E',
183162 => 'F',
184193 => 'G',
185254 => 'H',
187247 => 'J',
191166 => 'K',
192172 => 'L',
194232 => 'M',
196195 => 'N',
197182 => 'O',
197190 => 'P',
198218 => 'Q',
200187 => 'R',
200246 => 'S',
203250 => 'T',
205218 => 'W',
206244 => 'X',
209185 => 'Y',
212209 => 'Z',
);
private $_charset = null ;
/* *
* 构造函数, 指定需要的编码 default: utf-8
* 支持utf-8, gb2312
*
* @param unknown_type $charset
*/
public function __construct( $charset = 'utf-8' )
{
$this ->_charset = $charset ;
}
/* *
* 中文字符串 substr
*
* @param string $str
* @param int $start
* @param int $len
* @return string
*/
private function _msubstr( $str , $start , $len )
{
$start = $start * 2 ;
$len = $len * 2 ;
$strlen = strlen ( $str );
$result = '' ;
for ( $i = 0; $i < $strlen ; $i ++ )
{
if ( $i >= $start && $i < ( $start + $len ))
{
if ( ord ( substr ( $str , $i , 1)) > 129 )
{
$result .= substr ( $str , $i , 2 );
}
else
{
$result .= substr ( $str , $i , 1 );
}
}
if ( ord ( substr ( $str , $i , 1)) > 129 )
{
$i ++ ;
}
}
return $result ;
}
/* *
* 字符串切分为数组 (汉字或者一个字符为单位)
*
* @param string $str
* @return array
*/
private function _cutWord( $str )
{
$words = array ();
while ( $str != "" )
{
if ( $this ->_isAscii( $str ))
{ /* 非中文 */
$words [] = $str [0 ];
$str = substr ( $str , strlen ( $str [0 ]));
}
else
{
$word = $this ->_msubstr( $str , 0, 1 );
$words [] = $word ;
$str = substr ( $str , strlen ( $word ));
}
}
return $words ;
}
/* *
* 判断字符是否是ascii字符
*
* @param string $char
* @return bool
*/
private function _isAscii( $char )
{
return ( ord ( substr ( $char , 0, 1)) < 160 );
}
/* *
* 判断字符串前3个字符是否是ascii字符
*
* @param string $str
* @return bool
*/
private function _isAsciis( $str )
{
$len = strlen ( $str ) >= 3 ? 3 : 2 ;
$chars = array ();
for ( $i = 1; $i < $len - 1; $i ++ )
{
$chars [] = $this ->_isAscii( $str [ $i ]) ? 'yes' : 'no' ;
}
$result = array_count_values ( $chars );
if ( empty ( $result ['no' ]))
{
return true ;
}
return false ;
}
/* *
* 获取中文字串的拼音首字符
*
* @param string $str
* @return string
*/
public function getInitials( $str )
{
if ( empty ( $str ))
return '' ;
if ( $this ->_isAscii( $str [0]) && $this ->_isAsciis( $str ))
{
return $str ;
}
$result = array ();
if ( $this ->_charset == 'utf-8' )
{
$str = iconv ('utf-8', 'gb2312', $str );
}
$words = $this ->_cutWord( $str );
foreach ( $words as $word )
{
if ( $this ->_isAscii( $word ))
{ /* 非中文 */
$result [] = $word ;
continue ;
}
$code = ord ( substr ( $word , 0, 1)) * 1000 + ord ( substr ( $word , 1, 1 ));
/* 获取拼音首字母A--Z */
if (( $i = $this ->_search( $code )) != -1 )
{
$result [] = $this ->_pinyins[ $i ];
}
}
return strtoupper ( implode ('', $result ));
}
/* *
* 20140624 wangtianbao 获取首字母
* @param string $str
* @return string
*/
public function getFirstString( $str )
{
// 先把中文转换成字母
$new_string = $this ->getInitials( $str );
if ( empty ( $new_string ))
{
return '' ;
}
else
{
return strtoupper ( substr ( $new_string , 0, 1 ));
}
}
private function _getChar( $ascii )
{
if ( $ascii >= 48 && $ascii <= 57 )
{
return chr ( $ascii ); /* 数字 */
}
elseif ( $ascii >= 65 && $ascii <= 90 )
{
return chr ( $ascii ); /* A--Z */
}
elseif ( $ascii >= 97 && $ascii <= 122 )
{
return chr ( $ascii - 32); /* a--z */
}
else
{
return '-'; /* 其他 */
}
}
/* *
* 查找需要的汉字内码(gb2312) 对应的拼音字符( 二分法 )
*
* @param int $code
* @return int
*/
private function _search( $code )
{
$data = array_keys ( $this -> _pinyins);
$lower = 0 ;
$upper = sizeof ( $data ) - 1 ;
$middle = (int) round (( $lower + $upper ) / 2 );
if ( $code < $data [0 ])
return -1 ;
for (;;)
{
if ( $lower > $upper )
{
return $data [ $lower - 1 ];
}
$tmp = (int) round (( $lower + $upper ) / 2 );
if (! isset ( $data [ $tmp ]))
{
return $data [ $middle ];
}
else
{
$middle = $tmp ;
}
if ( $data [ $middle ] < $code )
{
$lower = (int) $middle + 1 ;
}
else if ( $data [ $middle ] == $code )
{
return $data [ $middle ];
}
else
{
$upper = (int) $middle - 1 ;
}
}
}
}
声明:本文来自网络,不代表【好得很程序员自学网】立场,转载请注明出处:http://haodehen.cn/did29493