go一个简单的爬虫(豆瓣)

最近在学习go语言爬虫,写了个小demo

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"regexp"
	"strconv"
)

type Movie struct {
	name   string
	mark   string
	person string
	time   string
	url    string
}

func main() {
	chs := make([] chan int, 10)
	sliceList := []int{1291841,26761416,1309220,1300741,1293172}
	for i,v:=range sliceList{
		go child(v, chs[i])
	}
	for _, ch := range chs {
		<-ch
	}
}

func child(id int, ch chan int) {
	url := "https://movie.douban.com/subject/" + strconv.Itoa(id) + "/"
	resp, err := http.Get(url)
	if err != nil {
		panic(err)
	}
	defer resp.Body.Close()
	sHtml, _ := ioutil.ReadAll(resp.Body)

	movie := new(Movie)

	movie.name = GetValue(`<span\s*property="v:itemreviewed">(.*)</span>`, &sHtml)
	movie.mark = GetValue(`<strong\s*class="ll\s*rating_num"\s*property="v:average">(.*)</strong>`, &sHtml)
	movie.person = GetValue(`<a href="/celebrity/[0-9]+/" rel="v:directedBy">(.*)</a>`, &sHtml)
	movie.time = GetValue(`<span property="v:runtime" content="(.*)">.*</span>`, &sHtml)
	movie.url = GetValue(`<a href="(.*)" target="_blank" rel="nofollow">.*</a>`, &sHtml)

	fmt.Println(movie)

	ch <- 1
}

func GetValue(rule string, sHtml *[] byte) string {
	reg := regexp.MustCompile(rule)
	result := reg.FindAllStringSubmatch(string(*sHtml), 1)
	return result[0][1]
}

  

猜你喜欢

转载自www.cnblogs.com/piaobodewu/p/11086811.html