Java 网络编程 Java基本爬虫 URL类获取网络资源.

一、URL类可获取网址的相关信息和网络的资源.

二、URL类获取网址的信息

package Java学习.网络编程.TCP_UDP.URL类;

import java.net.MalformedURLException;
import java.net.URL;

/**
 * 一、用处: URL 可以通过地址访问网络上的资源
 * 二、构造方法
 * 1. 最常用: URL(String url)
 * 三、常用发方法
 * 1.
 */
public class URL类基本用法 {
    public static void main(String[] args) {
        String urlString = "http://localhost:8080/bmft/index.html?username=bmft&password=123";
        try {
            URL url = new URL(urlString);
            System.out.println("获取协议Protocol:"+url.getProtocol());
            System.out.println("获取主机Host:"+url.getHost());
            System.out.println("端口port: "+url.getPort());
            System.out.println("文件路径Path: "+url.getPath());
            System.out.println("文件File: "+url.getFile());
            System.out.println("问题Query:"+url.getQuery());
            System.out.println("权限Authority: "+url.getAuthority());
            System.out.println("用户信息:"+url.getUserInfo());
            /*
            获取协议Protocol:http
            获取主机Host:localhost
            端口port: 8080
            文件路径Path: /bmft/index.html
            文件File: /bmft/index.html?username=bmft&password=123
            问题Query:username=bmft&password=123
            权限Authority: localhost:8080
            用户信息:null
             */

        } catch (MalformedURLException e) {
            e.printStackTrace();
        }

    }
}
```
Run:
获取协议Protocol:http
获取主机Host:localhost
端口port: 8080
文件路径Path: /bmft/index.html
文件File: /bmft/index.html?username=bmft&password=123
问题Query:username=bmft&password=123
权限Authority: localhost:8080
用户信息:null

Process finished with exit code 0
### 三、获取网络资源(爬虫)
#### 1. 思路分析
1.  使用网址新建一个 URL类
2.构建连接 (HttpURLConnection) url.openConnection();
3.  使用httpURLConection.getInputStream() 的方法获取网址的输入流的
4.  输入流配合本地的输出流,写入爬取文件到本地


#### 2.代码:
```java
package Java学习.网络编程.TCP_UDP.URL类;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;

/**
 * 一、步骤
 * 1. 构造URL
 * 2.
 */
public class URL下载网络资源 {
    private void downLoad(String addressUrl) {
        try {


            String urlString = addressUrl;
            String localPath = "D:\\Program Files\\JetBrains\\test1\\Lab\\src\\Java学习\\网络" +
                    "编程\\TCP_UDP\\URL类\\download\\";
            //1. 构造URL
            URL url = new URL(urlString);
            //2. 连接到资源 HTTP
            HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
            //3. getInputStream() 获得inputStream 的资源,新建本地流写入到本地
            InputStream inputStream = httpURLConnection.getInputStream();
            String[] strings = urlString.split("/");
            FileOutputStream fileOutputStream = new FileOutputStream(localPath + strings[strings.length - 1]);
            byte[] bytes = new byte[1024 * 10];
            int length;
            while ((length = inputStream.read(bytes)) != -1) {
                fileOutputStream.write(bytes, 0, length);
            }
            System.out.println("写入完成");
        } catch (IOException e) {

        } finally {

        }
    }

    public static void main(String[] args) throws IOException {

        String urlString = "http://localhost:8080/bmft/index.html";
        String urlString2 = "https://csdnimg.cn/feed/20200509/d7ade7b43ca6cae0536a4d36780540d2.png";
        URL下载网络资源 urlDownload = new URL下载网络资源();
        urlDownload.downLoad(urlString);
        urlDownload.downLoad(urlString2);

    }
}
```
Run:
![在这里插入图片描述](https://img-blog.csdnimg.cn/20200510221853672.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2phcnZhbjU=,size_16,color_FFFFFF,t_70)
![在这里插入图片描述](https://img-blog.csdnimg.cn/20200510221934493.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2phcnZhbjU=,size_16,color_FFFFFF,t_70)
原创文章 132 获赞 11 访问量 4702

猜你喜欢

转载自blog.csdn.net/jarvan5/article/details/106043400