编程语言有很多种,想要全部学会简直天方夜谭,而且每个细分领域有很多要学习的知识,所以对于新手来说一定要专注一个领域学透学扎实了。下面几种语言做的一些简单爬虫,大家可以参考下。
Python 简单爬虫
import requests, re
if __name__ == "__main__":
r = requests.get('http://docs.python-requests.org/zh_CN/latest/user/quickstart.html')
r.encoding = "UTF-8"
print(r.text) # 用于打印页面内容
# 正则搜索 .表示任意字符*表示任意个数,group(第一个括号)
search = re.search('href="#">(.*)</a><ul>', r.text)
print(search.group(1))
golang简单爬虫
package main
import (
"fmt"
"io/ioutil"
"net/http"
"regexp"
)
func main() {
resp, _ := http.Get("https://studygolang.com/static/pkgdoc/pkg/net_http.htm")
defer resp.Body.Close()
bytes, _ := ioutil.ReadAll(resp.Body)
re := regexp.MustCompile(`<meta name="private:description" content="(.*)">`)
b := re.FindSubmatch(bytes)[1]
fmt.Println(string(b))
}
C语言简单爬虫
#include<stdio.h>
void main()
{
int a[10],i,j,t;
for(i=0;i<=9;i++)
scanf("%d",&a[i]);
for(i=0;i<9;i++)
for(j=0;j<9-i;j++)
if(a[j]>a[j+1])
{
t=a[j];
a[j]=a[j+1];
a[j+1]=t;
}
for(i=0;i<=9;i++)
printf("%d\t",a[i]);
}
java语言爬虫
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://jshk.com.cn"
xmlns:xsi="http://jshk.com.cn/mb/reg.asp?kefu=xjy"
xsi:schemaLocation="http://http://jshk.com.cn</modelVersion>
<groupId>org.example</groupId>
<artifactId>BeCarefulInPrison</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.51</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>