1、正则表达式应用
package main
import "fmt"
import "regexp"
/*
正则表达式 能够匹配的字符串
abc abc
a.c .表示通配,除了\n以外所有的字符 axc aac abc
a\.c 只能匹配a.c
a\dc 能匹配(0-9)的任意数字 a1c a2c a3c
abc* 能匹配前一个字符的0次或无限次 ab abc abcc abccccccc
abc+ 能匹配前一个字符的1次或无限次 abc abcc abccccccc
*/
var content1 string = "3.14 123224 .68 hello 1.0 abc 6666 888"
func main() {
//通过一个正则表达式,生成一个正则匹配的句柄
myexp := regexp.MustCompile(`\d+\.\d+`) //匹配 数字.数字
//第二个参数表示匹配的个数,-1表示无限个,1,表示匹配一个
resul := myexp.FindAllStringSubmatch(content1, -1)
fmt.Printf("%+v\n", resul)
fmt.Println("----------------")
for _, mystr := range resul {
fmt.Printf("%+v\n", mystr[0])
}
}
package main
import "fmt"
import "regexp"
var str string = `
<tile>标题</tile>
<div>hahah</div>
<div>xixi
xixixi
hahahah
wawawa</div>
`
//<div>(.*?)</div>
/*
go语言中
顿号中:\n表示一个\字符,一个n字符
`
dabc
\n
\r
`
双引号中:\n表示换行,\表示转义
"
dbad
\n
\r
"
*/
func main() {
//通过一个正则表达式,生成一个正则匹配的句柄
//myexp := regexp.MustCompile(`<div>(.*?)</div>`) x*? 重复>=0次匹配x,越少越好(优先跳出重复)
myexp := regexp.MustCompile(`<div>(?s:(.*?))</div>`)// s 让.可以匹配\n(默认关闭)
/*
(?flags:re) 设置re段的标志,不捕获的分组
I 大小写敏感(默认关闭)
m ^和$在匹配文本开始和结尾之外,还可以匹配行首和行尾(默认开启)
s 让.可以匹配\n(默认关闭)
U 非贪婪的:交换x*和x*?、x+和x+?……的含义(默认关闭)
*/
//第二个参数表示匹配的个数,-1表示无限个,1,表示匹配一个
resul := myexp.FindAllStringSubmatch(str, -1)
fmt.Printf("%+v\n", resul) //打印不出多行,因为有\n
//(?s:(.*?))
for _, text := range resul {
fmt.Println("=================")
fmt.Println(text[0]) //带div
fmt.Println(text[1]) //不带div
}
}
通过正则 爬取网页数据
package main
import "fmt"
import "strconv"
import "net/http"
import "github.com/axgle/mahonia"
import "strings"
import "io/ioutil"
import "os"
import "regexp"
type Spider struct {
page int
}
func ConvertToString(src string, srcCode string, tagCode string) string {
srcCoder := mahonia.NewDecoder(srcCode)
srcResult := srcCoder.ConvertString(src)
tagCoder := mahonia.NewDecoder(tagCode)
_, cdata, _ := tagCoder.Translate([]byte(srcResult), true)
result := string(cdata)
return result
}
func httpGet(url string) (conter string, statusCode int) {
resp, err := http.Get(url)
if err != nil { //不等于空表示连接失败
fmt.Println(err)
conter = ""
statusCode = -100
return
}
defer resp.Body.Close()
//从服务器读取数据
data, err := ioutil.ReadAll(resp.Body)
if err != nil { //读取数据失败
fmt.Println(err)
statusCode = resp.StatusCode //返回失败的编码
return
}
var str string
str = string(data) //把数据转换成string
conter = ConvertToString(str, "gbk", "utf-8")
statusCode = resp.StatusCode
//fmt.Println(conter)
return
}
//获取每个段子的数据
func (this *Spider) SpiderOneDz(url string) (dz_title string, dz_content string) {
//连接每一个独立网页
conter, rcode := httpGet(url)
if rcode != 200 {
fmt.Println("SpiderOneDz is httpGet err")
return
}
//匹配标题
title_exp := regexp.MustCompile(`<h1>(.*?)</h1>`)
//得到标题
titles := title_exp.FindAllStringSubmatch(conter, -1)
for _, title := range titles {
dz_title = title[1]
break
}
//得到内容
conter_exp := regexp.MustCompile(`<td><p>(?s:(.*?))</p></td>`)
conters := conter_exp.FindAllStringSubmatch(conter, -1)
for _, conter := range conters {
dz_content = conter[1]
}
return
}
func (this *Spider) StroeDzToFile(title_slice []string, conter_slice []string) {
filename := "mydata.txt"
f, err := os.OpenFile(filename, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0644)
if err != nil {
fmt.Println("open file err")
return
}
defer f.Close()
for i := 0; i < len(title_slice); i++ {
f.WriteString("\n=============\n")
f.WriteString(title_slice[i])
f.WriteString("\n==============\n")
f.WriteString(conter_slice[i])
}
}
func (this *Spider) SpiderOnePage() {
fmt.Println("正在爬取", this.page, "页")
url := ""
if this.page == 1 {
url = "https://www.xxxx.com/dz/index.html"
} else {
url = "https://www.xxxx.com/dz/list_" + strconv.Itoa(this.page) + ".html"
}
fmt.Println(url)
// 获取一页数据
contert, rcode := httpGet(url)
if rcode != 200 {
fmt.Println("httpGet err")
return
}
//筛选数据 获取每个段子的url
dz_url_exp := regexp.MustCompile(`<h4> <a href="(.*?)"`)
urls := dz_url_exp.FindAllStringSubmatch(contert, -1)
var full_url string
//开辟空间存储标题和内容
title_slice := make([]string, 0)
conter_slice := make([]string, 0)
//每一页的段子
for _, du_url := range urls {
full_url = "https://www.xxxxx.com" + du_url[1]
//该函数会返回标题和内容
dz_title, dz_content := this.SpiderOneDz(full_url)
// 剔除杂乱信息
dz_content = strings.Replace(dz_content, "\r\n", "\n", -1)
dz_content = strings.Replace(dz_content, "</p>", "", -1)
fmt.Println("title===", dz_title)
fmt.Println("content===", dz_content)
title_slice = append(title_slice, dz_title)
conter_slice = append(conter_slice, dz_content)
}
this.StroeDzToFile(title_slice, conter_slice)
fmt.Println("代码结束")
}
func (this *Spider) doWork() {
fmt.Println("开始爬取")
var cmd string
this.page = 1
for {
fmt.Println("请输入任意键,开始爬取下一页,输入exit 退出")
fmt.Scanf("%s", &cmd)
if cmd == "exit" {
fmt.Println("exit")
break
}
//开始爬取没一页的段子
this.SpiderOnePage()
this.page++
}
}
func main() {
sp := new(Spider)
sp.doWork()
}
map 数据结构
package main
import "fmt"
func main() {
//声明一个map 数据类型的变量 [key]value
var mymap map[string]string //map 是一个空 指针
if mymap == nil {
fmt.Println("mymap is nil")
}
//给map地址就是给他空间
mymap = make(map[string]string)
if mymap != nil {
fmt.Println("mymap is not nil")
}
mymap["aaa"] = "1111"
mymap["bbb"] = "2222"
mymap["ccc"] = "3333"
for key, value := range mymap {
fmt.Println(key)
fmt.Println(value)
}
fmt.Println("-------------")
for key, _ := range mymap {
fmt.Println(mymap[key]) //打印key对应的值
}
fmt.Println("-------------")
Student := map[string]interface{}{
"Name": "xxx",
"Age": 19,
}
for key, value := range Student {
fmt.Println(key)
fmt.Println(value)
}
fmt.Println("-------------")
// 修改某个字段
Student["Age"] = 29
for _, value := range Student {
fmt.Println(value)
}
//删除某个字段
delete(Student, "Age")
for key, _ := range Student {
fmt.Println(key)
}
}
go语言中的json解析
package main
import "fmt"
import "encoding/json"
type Movie struct {
Title string
Year int
Price int
}
var my_Year struct {
Year int
Title string
}
var jsons []byte
func encoding() {
mymovie := Movie{"西红柿首富", 2018, 50}
json_str, err := json.Marshal(mymovie)
if err != nil {
fmt.Println("json.Marshal err")
return
}
fmt.Printf("json_str %s\n", json_str)
fmt.Println("---------")
jsons = json_str
}
func decoding() {
fmt.Printf("json_str %s\n", jsons)
//1、把json解析的数据放到结构体变量中
if err := json.Unmarshal(jsons, &my_Year); err == nil {
fmt.Printf("%+v\n", my_Year)
}
fmt.Println("---------")
//一次性解析全部的数据
my_movie := Movie{}
if err := json.Unmarshal(jsons, &my_movie); err == nil {
//解析成功
fmt.Printf("%+v\n", my_movie)
}
fmt.Println("---------")
//2、把json解析的数据放到结map变量中
my_map := map[string]interface{}{}
if err := json.Unmarshal(jsons, &my_map); err == nil {
fmt.Printf("%+v\n", my_map)
}
fmt.Println("---------")
fmt.Println(my_map["Price"])
fmt.Println(my_map["Title"])
}
func main() {
encoding()
decoding()
}