PHP 爬虫抓取歌词

<?php
header("Content-type:text/html;charset=utf-8");
class Small_crawler
{
    protected $regular;       //获取内容正则
    protected $url;           //源头
    protected $i;             //控制数量
    protected $count;         //总数
    protected $lyrics_ze;     //歌词内容
    protected $lyrics_lrc;    //歌词地址匹配
    protected $txt;           //存储地址


    function __construct()
    {
        $this->regular = "/<a href=\"\/song\/([0-9])*\".*>*<\/a>/";

        $this->lyrics_ze = "/<div id=\"lyricCont\".*<\/div>/";

        $this->lyrics_lrc = "/\"http:\/\/.*\"/";

        $this->url = 'http://music.baidu.com/';

        $this->txt = "playlist.txt";

        $this->i = 0;

        $this->count = 110;

    }


    /**
     * 开始任务
     */
    function perform()
    {
        $data = file_get_contents($this->url);

        if(preg_match_all($this->regular, $data, $mar)){

            $this->lyrics($mar); //歌词筛选

        }else{

            echo '已没有可用信息';
            exit;

        }

    }


    /**
     * @param $data
     * 歌词处理
     */
    function lyrics($data)
    {

        if(is_array($data)){

            //歌词筛选
            foreach ($data[0] as $k=>$y){

                $data_s = explode('"',$y);

                if(!empty($data_s[1])){

                    $the_lyrics = file_get_contents($this->url.$data_s[1]);

                    if(preg_match_all($this->lyrics_ze, $the_lyrics, $lyrics_x)){

                        if(preg_match_all($this->lyrics_lrc, $lyrics_x[0][0], $lyrics_c)){

                            $lyrics_c = explode('"',$lyrics_c[0][0]);

                            $lyrics_file = file_get_contents($lyrics_c[1]);

                            $this->write($lyrics_file);

                        }
                    }

                }

            }

            //循环操作
            foreach ($data[0] as $ki=>$yi){

                $data_t = explode('"',$yi);

                if(!empty($data_t[1])){

                    $the_lyrics_as = file_get_contents($this->url.$data_t[1]);

                    if(preg_match_all($this->regular, $the_lyrics_as, $mars)){

                        $this->lyrics($mars); //歌词筛选
                    }

                }
            }

        }

    }


    /**
     * @param $lyrics_file
     * 写入文件
     */
    function write($lyrics_file)
    {

        if($this->i < $this->count){

            $acs = preg_replace('/(\[.*\])/','',$lyrics_file);

            $myfile = fopen($this->txt, "a");
            fwrite($myfile,$acs);
            fwrite($myfile,"\r\n".$this->i.".-----------------------------------------------------------\r\n");

            $this->i++;
        }else{

            exit; //结束停止
        }

    }

}

//运行
$ts = new Small_crawler();
$ts->perform()

?>

猜你喜欢

转载自blog.csdn.net/qq_24909089/article/details/79852823