国外免费vps
国内免费vps云服务器

利用百度自然语言处理功能制作插件,智能对文章进行归纳概括,提高文章伪原创

之前在文章《如何在WordPress文章中调用摘要》中,提到过这种方法,就是调用百度AI新闻摘要API接口,智能化对wordpress文章内容进行归纳概括,这里我就给大家分享一下这种方法。

<?php
 
set_time_limit(270);
error_reporting(E_ERROR | E_WARNING | E_PARSE);
 
$content_tag_name = '内容';
 
$headdd = '';
$taill = '';
 
switch($LabelArray['PageType'])
{
    case 'List'://处理列表页,只能处理html
        break;
    case 'Pages'://处理多页,只能处理html
        break;
    case 'Content'://处理默认页,只能处理html
        break;
    case 'Save'://只有保存时是可以处理标签值的
        // 保存原文
 
    try {
    /**********************************************************************/
    // 这一步用来获取伪原创文章
    /**********************************************************************/
    $title = $LabelArray['标题'];
    $content = $LabelArray[$content_tag_name];
 
    $LabelArray['摘要'] = get_summary(200, $content);
 
    }
    catch (Exception $e) {
        $LabelArray['标题'] .= $e->getMessage();
        $LabelArray[$content_tag_name] .= $e->getMessage();
    }
        break;
    default:
        //$LabelArray[$content_tag_name]=curl_request($url, array('wenzhang'=>$LabelArray[$content_tag_name] ));
}
 
echo serialize($LabelArray);
 
 
function get_summary($len, $contents) {
    $APPID = '12345678'; // 自己去百度申请
    $APPKEY = '123456789';
 // 自己去百度申请
    $SECRETKEY = '123456789';
 // 自己去百度申请
 
    $contents = strip_tags($contents);
    $contents = str_replace(' ', ' ', $contents);
    $contents = str_replace("\t", ' ', $contents);
 
    $url_kw = "http://api-2.78tp.com/nlp/summary.php?appid={$APPID}&appkey={$APPKEY}&secretkey={$SECRETKEY}";
    $summary = curl_request($url_kw, array(
    'len'=>$len,
    'text'=>$contents));
 
    return $summary;
}
 
 
function compose_article($title, $content) {
    $separator = compose_separator();
    return $title.$separator.$content;
}
 
function compose_separator() {
    return PHP_EOL.'('.TITLE_SEPAR2.')'.PHP_EOL;
}
 
 
function fix_separator($article) {
    return $article;
}
 
 
function get_wyc_article($str) {
    global $url;
    $separator = compose_separator();
    $separator = str_replace(PHP_EOL, '', $separator);
    $wyc = curl_request($url, array('wenzhang'=>$str));
 
    $wyc_f = $wyc;
    $wyc = fix_separator($wyc);
    $wyc = explode($separator, $wyc);
 
    if (isset($wyc[0])){
        $wyc[0] = str_replace('标题:', '', $wyc[0]);
        $wyc[0] = str_replace('标题:', '', $wyc[0]);
        $wyc[0] = str_replace('目:', '', $wyc[0]);
        $wyc[0] = str_replace('目:', '', $wyc[0]);
    }
 
    //if (isset($wyc[1])) $wyc[1] = trim($wyc[1]);
    //$wyc[1] = $wyc_f.'jjjjjjjj'.$wyc[1];
    return $wyc;
}
 
 
function get_wyc_title($str) {
    $title = get_wyc_article($str.PHP_EOL.PHP_EOL.PHP_EOL.$str.PHP_EOL.PHP_EOL.PHP_EOL.$str);
    $title = fix_newline($title);
    $title = explode(PHP_EOL, $title);
    return $title[0];
}
 
 
 
function remove_alt($contents) {
    $contents = preg_replace('/alt=\"(.*)\"/', '', $contents);
    return $contents;
}
 
 
function fix_title($contents) {
    $punctuation_symbol = array('。', '?', ',', ':', ';', '、', '!',
                                '.',  '?',  ',',  ':',  ';', '!');
 
     $contents = str_replace($punctuation_symbol, '', $contents);
    return $contents;
}
 
function br2newline($contents) {
    $contents = str_replace('<br>', PHP_EOL, $contents);
    $contents = str_replace('<br/>', PHP_EOL, $contents);
    $contents = str_replace('<br />', PHP_EOL, $contents);
    $contents = str_replace('<BR/>', PHP_EOL, $contents);
    $contents = str_replace('<BR>', PHP_EOL, $contents);
    $contents = str_replace('<BR />', PHP_EOL, $contents);
 
    return $contents;
}
 
function newline2br($contnets) {
    $contnets = str_replace(PHP_EOL, "<br>", $contnets);
//    $contnets = str_replace('><br><', '><', $contnets);
    $contnets = str_replace('<p><br>', '<p>', $contnets);
    return $contnets;
}
 
 
function delete_newline($contents) {
    $contents = fix_newline($contents);
//    $contents = str_replace(PHP_EOL.PHP_EOL, PHP_EOL, $contents);
//    $contents = str_replace('>'.PHP_EOL, '>', $contents);
    return $contents;
}
 
function reset_newline_win($contents) {
    // 优化换行符
    $contents = str_replace("\r\n", "\n", $contents);
    $contents = str_replace("\r", "\n", $contents);
    $contents = str_replace("\n", PHP_EOL, $contents);
 
    return $contents;
}
 
function fix_newline($data) {
    $data = str_replace("\r", "\n", $data);
    while(strpos($data, "\n\n") !== false) {
        $data = str_replace("\n\n", "\n", $data);
    }
    $data = str_replace("\n", PHP_EOL, $data);
 
    return $data;
}
 
function clean_contents($contents) {
//    $str = preg_replace('#<([^>\s/]+)[^>]*>#','<$1>', $contents);
//    return $str;
    $sa = new cleanHtml; 
    $sa->allow = array( 'src' );   
    $sa->exceptions = array( 
    'img' => array( 'src', 'alt' ), 
    //'a' => array( 'href', 'title' ), 
    'iframe'=>array('src','frameborder'), 
    );
    $str = $sa->strip( $contents );  
 
    return $str;
}
 
 
function xfm_strong_str_replace_once($search, $replace, $subject) {
    $firstChar = strpos($subject, $search);
    if($firstChar !== false) {
        $beforeStr = substr($subject,0,$firstChar);
        $afterStr = substr($subject, $firstChar + strlen($search));
        return $beforeStr.$replace.$afterStr;
    } else {
        return $subject;
    }
}
 
//参数1:访问的URL,参数2:post数据(不填则为GET),参数3:提交的$cookies,参数4:是否返回$cookies
function curl_request($url,$post='',$cookie='', $returnCookie=0){
    if (! extension_loaded('curl')) {
        file_exists('./ext/php_curl.dll') && dl('php_curl.dll'); // 加载扩展
    }
     
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $url);
        curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)');
    if (ini_get('open_basedir') == '' && strtolower(ini_get('safe_mode')) != 'on'){
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
    }
        curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
        curl_setopt($curl, CURLOPT_REFERER, "http://XXX");
        if($post) {
            curl_setopt($curl, CURLOPT_POST, 1);
            curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post));
        }
        if($cookie) {
            curl_setopt($curl, CURLOPT_COOKIE, $cookie);
        }
        curl_setopt($curl, CURLOPT_HEADER, $returnCookie);
        curl_setopt($curl, CURLOPT_TIMEOUT, 150);
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        $data = curl_exec($curl);
        if (curl_errno($curl)) {
            return curl_error($curl);
        }
        curl_close($curl);
        if($returnCookie){
            list($header, $body) = explode("\r\n\r\n", $data, 2);
            preg_match_all("/Set\-Cookie:([^;]*);/", $header, $matches);
            $info['cookie']  = substr($matches[1][0], 1);
            $info['content'] = $body;
            return $info;
        }else{
            return $data;
        }
}
 
//echo $tag;
// 计算中文字符串长度
function utf8_strlen($string = null) {
// 将字符串分解为单元
preg_match_all("/./us", $string, $match);
// 返回单元个数
return count($match[0]);
}
 
 
function reg_escape( $str ) 
{ 
    $conversions = array( "^" => "\^", "[" => "\[", "." => "\.", "$" => "\$", "{" => "\{", "*" => "\*", "(" => "\(", "\\" => "\\\\", "/" => "\/", "+" => "\+", ")" => "\)", "|" => "\|", "?" => "\?", "<" => "\<", ">" => "\>" ); 
    return strtr( $str, $conversions ); 
} 
   
/**
* Strip attribute Class
* Remove attributes from XML elements
* @author David (semlabs.co.uk)
* @version 0.2.1
*/ 
   
class cleanHtml{ 
       
    public $str         = ''; 
    public $allow       = array(); 
    public $exceptions  = array(); 
    public $ignore      = array(); 
       
    public function strip( $str ) 
    { 
        $this->str = $str; 
           
        if( is_string( $str ) && strlen( $str ) > 0 ) 
        { 
            $res = $this->findElements(); 
            if( is_string( $res ) ) 
                return $res; 
            $nodes = $this->findAttributes( $res ); 
            $this->removeAttributes( $nodes ); 
        } 
           
        return $this->str; 
    } 
       
    private function findElements() 
    { 
        # Create an array of elements with attributes 
        $nodes = array(); 
        preg_match_all( "/<([^ !\/\>\n]+)([^>]*)>/i", $this->str, $elements ); 
        foreach( $elements[1] as $el_key => $element ) 
        { 
            if( $elements[2][$el_key] ) 
            { 
                $literal = $elements[0][$el_key]; 
                $element_name = $elements[1][$el_key]; 
                $attributes = $elements[2][$el_key]; 
                if( is_array( $this->ignore ) && !in_array( $element_name, $this->ignore ) ) 
                    $nodes[] = array( 'literal' => $literal, 'name' => $element_name, 'attributes' => $attributes ); 
            } 
        } 
           
        # Return the XML if there were no attributes to remove 
        if( !$nodes[0] ) 
            return $this->str; 
        else 
            return $nodes; 
    } 
       
    private function findAttributes( $nodes ) 
    { 
           
        # Extract attributes 
        foreach( $nodes as &$node ) 
        { 
            preg_match_all( "/([^ =]+)\s*=\s*[\"|']{0,1}([^\"']*)[\"|']{0,1}/i", $node['attributes'], $attributes ); 
            if( $attributes[1] ) 
            { 
                foreach( $attributes[1] as $att_key => $att ) 
                { 
                    $literal = $attributes[0][$att_key]; 
                    $attribute_name = $attributes[1][$att_key]; 
                    $value = $attributes[2][$att_key]; 
                    $atts[] = array( 'literal' => $literal, 'name' => $attribute_name, 'value' => $value ); 
                } 
            } 
            else 
                $node['attributes'] = null; 
               
            $node['attributes'] = $atts; 
            unset( $atts ); 
        } 
           
        return $nodes; 
    } 
       
    private function removeAttributes( $nodes ) 
    { 
           
        # Remove unwanted attributes 
        foreach( $nodes as $node ) 
        { 
               
            # Check if node has any attributes to be kept 
            $node_name = $node['name']; 
            $new_attributes = ''; 
            if( is_array( $node['attributes'] ) ) 
            { 
                foreach( $node['attributes'] as $attribute ) 
                { 
                    if( ( is_array( $this->allow ) && in_array( $attribute['name'], $this->allow ) ) || $this->isException( $node_name, $attribute['name'], $this->exceptions ) ) 
                        $new_attributes = $this->createAttributes( $new_attributes, $attribute['name'], $attribute['value'] ); 
                } 
            } 
            $replacement = ( $new_attributes ) ? "<$node_name $new_attributes>" : "<$node_name>"; 
            $this->str = preg_replace( '/'. reg_escape( $node['literal'] ) .'/', $replacement, $this->str ); 
        } 
           
    } 
       
    private function isException( $element_name, $attribute_name, $exceptions ) 
    { 
        if( array_key_exists($element_name, $this->exceptions) ) 
        { 
            if( in_array( $attribute_name, $this->exceptions[$element_name] ) ) 
                return true; 
        } 
           
        return false; 
    } 
       
    private function createAttributes( $new_attributes, $name, $value ) 
    { 
        if( $new_attributes ) 
            $new_attributes .= " "; 
        $new_attributes .= "$name=\"$value\""; 
           
        return $new_attributes; 
    } 
   
} 
 
?>

以上就是php代码,接口需要自行去百度申请,地址:https://ai.baidu.com/tech/nlp_apply/news_summary

将以上代码拷贝到php文件中,放到火车头的插件目录,最后在火车头选择此插件。采集文章后会自动生成摘要。结合 《如何在WordPress文章中调用摘要》 中的方法,可实现一定程度的伪原创。

火车头如何设置请自行百度。

赞(0)
【声明】:利用百度自然语言处理功能制作插件,智能对文章进行归纳概括,提高文章伪原创来源于网络。本站不参与任何交易,也非中介,仅记录个人感兴趣的主机测评结果和优惠活动,内容均不作直接、间接、法定、约定的保证。

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址