加入收藏 | 设为首页 | 会员中心 | 我要投稿 唐山站长网 (https://www.0315zz.com.cn/)- 数据安全、数据开发、文字识别、图像处理、媒体智能!
当前位置: 首页 > 站长学院 > PHP教程 > 正文

PHP切割汉字的常用方法实例总结

发布时间:2022-07-14 11:09:13 所属栏目:PHP教程 来源:互联网
导读:这篇文章主要介绍了PHP切割汉字的常用方法,结合实例形式总结分析了php针对汉字的编码转换、遍历、截取等相关操作技巧,需要的朋友可以参考下。 本文实例讲述了PHP切割汉字的常用方法,分享给大家供大家参考,具体如下: ?php /* @UTF-8编码的字符可能由1~3个
  这篇文章主要介绍了PHP切割汉字的常用方法,结合实例形式总结分析了php针对汉字的编码转换、遍历、截取等相关操作技巧,需要的朋友可以参考下。
 
  本文实例讲述了PHP切割汉字的常用方法,分享给大家供大家参考,具体如下:
 
  <?php
  /*
  @UTF-8编码的字符可能由1~3个字节组成。
  */
  /*--------------------------方法一截取中文字符串方法------------------------------*/
  function msubstr($str, $start, $len)
  {
    $tmpstr = "";
    $strlen = $start + $len;
    for ($i = 0; $i < $strlen; $i++) {
      if (ord(substr($str, $i, 1)) > 0xa0)  //ord()函数返回字符串的第一个字符的ASCII值
      {
        $tmpstr .= substr($str, $i, 2);
        $i++;
      } else {
        $tmpstr .= substr($str, $i, 1);
      }
    }
    return $tmpstr;
  }
  /*----------------------------第二种方法-----------------------------------*/
  //截取的是UTF-8字符串
  function utf_substr($str, $len)
  {
    $new_str = [];
    for ($i = 0; $i < $len; $i++) {
      $tem_str = substr($str, 0, 1);
      if (ord($tem_str > 127)) {
        $i++;
        if ($i < $len) {
          $new_str[] = substr($str, 0, 3);
          $str = substr($str, 3);
        }
      } else {
        $new_str[] = substr($str, 0, 1);
        $str = substr($str, 1);
      }
    }
    return join($new_str);//join()函数把数组元素组合为一个字符串
  }
  /*-------------------------------------第三种方法(UTF-8)--------------------------------*/
  function cutstr($string, $length)
  {
    preg_match_all("/[x01-x7f]|[xc2-xdf]|[x80-xbf]|xe0[xa0-xbf][x80-xbf]|[xe1-xef][x80-xbf][x80-xbf]|xf0[x90-xbf][x80-xbf][x80-xbf]|[xf1-xf7][x80-xbf][x80-xbf][x80-xbf]/", $string, $info);
    $wordscut = "";
    $j = 0;
    for ($i = 0; $i < count($info[0]); $i++) {
      $wordscut .= $info[0][$i];
      $j = ord($info[0][$i]) > 127 ? $j + 2 : $j + 1;
      if ($j > $length - 3) {
        return $wordscut . "...";
      }
    }
    return join('', $info[0]);
  }
  $string = "312哈哈,这个组合很难切割哦";
  echo cutstr($string, 10);
  /*---------------------------------下面是曾经用过的截取第三个的字符串的------------------------------*/
  // $name1 = mysql_result($my_rst,0,"name");
  // $name = preg_match("/([1-9][0-9]+)/",$name1,$r);
  // $name = $r[0];
  // if($name == ""){
  // $name=preg_replace('#^(?:[x00-x7F]|[xC0-xFF][x80-xBF]+){0,2}'.
  // '((?:[x00-x7F]|[xC0-xFF][x80-xBF]+){0,1}).*#s',
  // '$1',$name1);
  // }
  /*--------------------------------------------第四种方法(UTF-8)---------------------------------------------*/
  function cut_str($sourcestr, $cutlength)
  {
    $returnstr = '';
    $i = 0;
    $n = 0;
    $str_length = strlen($sourcestr);//字符串的字节数
    while ($n < $cutlength && $i <= $str_length) {
      $temp_str = substr($sourcestr, $i, 1);
      $ascnum = ord($temp_str);//得到字符串中第$i位字符的ASCII码
      if ($ascnum >= 224) {
        $returnstr = $returnstr . substr($sourcestr, $i, 3);//根据UTF-8编码规范,将3个连续的字符计为单个字符
        $i = $i + 3;//实际Byte记为3
        $n++;//字串长度为1
      } elseif ($ascnum >= 192)//如果ASCII位高于192
      {
        $returnstr = $returnstr . substr($sourcestr, $i, 2);//根据UTF-8编码规范,将2个连续的字符记为单个字符
        $i = $i + 2;//实际Byte记为2
        $n++;//字串长度为1
      } elseif ($ascnum >= 65 && $ascnum <= 90)//如果是大写字母
      {
        $returnstr = $returnstr . substr($sourcestr, $i, 1);
        $i = $i + 1;//byte记为1
        $n++;//但考虑到整体美观,大写字母计成一个高位字符
      } else {
        $returnstr = $returnstr . substr($sourcestr, $i, 1);
        $i = $i + 1;//实际的Byte记为1
        $n = $n + 0.5;//小写字母和半角标点等与半个高位字符宽...
      }
    }
    if ($str_length > $cutlength) {
      $returnstr = $returnstr . "...";//超过长度时在尾处加上省略号
    }
    return $returnstr;
  }
  /*--------------------第五种方法(UTF-8)---------------------------------------------*/
  function FSubstr($title, $start, $len = "", $magic = true)
  {
    if ($len == "") $len = strlen($title);
    if ($start != 0) {
      $startv = ord(substr($title, $start, 1));
      if ($startv >= 128) {
        if ($startv < 192) {
          for ($i = $start - 1; $i > 0; $i--) {
            $tempv = ord(substr($title, $i, 1));
            if ($tempv >= 192) break;
          }
          $start = $i;
        }
      }
    }
    if (strlen($title) <= $len) return substr($title, $start, $len);
    $alen = 0;
    $blen = 0;
    $realnum = 0;
    $length = 0;
    for ($i = $start; $i < strlen($title); $i++) {
      $ctype = 0;
      $cstep = 0;
      $cur = substr($title, $i, 1);
      if ($cur == "&") {
        if (substr($title, $i, 4) == "<") {
          $cstep = 4;
          $length += 4;
          $i += 3;
          $realnum++;
          if ($magic) {
            $alen++;
          }
        } elseif (substr($title, $i, 4) == ">") {
          $cstep = 4;
          $length += 4;
          $i += 3;
          $realnum++;
          if ($magic) {
            $alen++;
          }
        } elseif (substr($title, $i, 5) == "&") {
          $cstep = 5;
          $length += 5;
          $i += 4;
          $realnum++;
          if ($magic) {
            $alen++;
          }
        } elseif (substr($title, $i, 6) == """) {
          $cstep = 6;
          $length += 6;
          $i += 5;
          $realnum++;
          if ($magic) {
            $alen++;
          }
        } elseif (preg_match("/&#(d+);?/i", substr($title, $i), $match)) {
          $cstep = strlen($match[0]);
          $length += strlen($match[0]);
          $i += strlen($match[0]) - 1;
          $realnum++;
          if ($magic) {
            $blen++;
            $ctype = 1;
          }
        }
      } else {
        if (ord($cur) >= 252) {
          $cstep = 6;
          $length += 6;
          $i += 5;
          $realnum++;
          if ($magic) {
            $blen++;
            $ctype = 1;
          }
        } elseif (ord($cur) >= 248) {
          $cstep = 5;
          $length += 5;
          $i += 4;
          $realnum++;
          if ($magic) {
            $ctype = 1;
            $blen++;
          }
        } elseif (ord($cur) >= 240) {
          $cstep = 4;
          $length += 4;
          $i += 3;
          $realnum++;
          if ($magic) {
            $blen++;
            $ctype = 1;
          }
        } elseif (ord($cur) >= 224) {
          $cstep = 3;
          $length += 3;
          $i += 2;
          $realnum++;
          if ($magic) {
            $ctype = 1;
            $blen++;
          }
        } elseif (ord($cur) >= 192) {
          $ctype = 2;
          $length += 2;
          $i += 1;
          $realnum++;
          if ($magic) {
            $blen++;
            $ctype = 1;
          }
        } elseif (ord($cur) >= 128) {
          $length += 1;
        } else {
          $cstep = 1;
          $length += 1;
          $realnum++;
          if ($magic) {
            if (ord($cur) >= 65 && ord($cur) <= 90) {
              $blen++;
            } else {
              $alen++;
            }
          }
        }
      }
      if ($magic) {
        if (($blen * 2 + $alen) == ($len * 2)) break;
        if (($blen * 2 + $alen) == ($len * 2) + 1) {
          if ($ctype == 1) {
            $length -= $cstep;
            break;
          } else {
            break;
          }
        }
      } else {
        if ($realnum == $len) break;
      }
    }
    unset($cur);
    unset($alen);
    unset($blen);
    unset($realnum);
    unset($ctype);
    unset($cstep);
    return substr($title, $start, $length);
  }
  function utf8Substr($str, $from, $len)
  {
    return preg_replace('#^(?:[x00-x7F]|[xC0-xFF][x80-xBF]+){0,' . $from . '}' .
      '((?:[x00-x7F]|[xC0-xFF][x80-xBF]+){0,' . $len . '}).*#s',
      '$1', $str);
  }
  $title = "你哈珀niad1纳斯达wop asdni你爱谁都没阿斯顿撒旦12ccs- sd";
  $title = utf8Substr($title, 0, 15);
  echo $title;
  ?>

(编辑:唐山站长网)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    热点阅读