如何获取URL网页内容

代码

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;

public class ReadUrlTwo {

    /**
     *  打印 URLConnection 头部信息
     *  URL和URLConnection这两个类最大的不同在于:
     *  URLConnection提供了对HTTP首部的访问;
     *  URLConnection可以配置发送给服务器的请求参数;
     *  URLConnection除了读取服务器数据外,还可以向服务器写入数据;
     */
    public static void printConnection(URLConnection connection){
        System.out.println("Content-Type: " + connection.getContentType());
        System.out.println("Content-Length: " + connection.getContentLength());
        System.out.println("Content-LengthLong: " + connection.getContentLengthLong());
        System.out.println("Content-encoding: " + connection.getContentEncoding());
        System.out.println("Date: " + connection.getDate());
        System.out.println("Expires: " + connection.getExpiration());
        System.out.println("Last-modified: " + connection.getLastModified());
    }

    public static void printHeader(URLConnection connection) {
        for (int i = 1; ; i++) {
            String header = connection.getHeaderField(i);
            if (header == null) {
                break;
            }
            System.out.println(connection.getHeaderFieldKey(i) + ": " + header);
        }
    }

    // 直接输出 char 类型
    public static void get(URLConnection connection) {
        try {
            ReadUrlTwo.printHeader(connection);

            InputStream in = connection.getInputStream();
            //将InputStream串链到一个Reader
            Reader reader = new InputStreamReader(in);
            int c;
            while ((c = reader.read())!= -1) {
                System.out.print((char)c);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void getString(URLConnection connection) {
        try {
            InputStream inputStream=connection.getInputStream();
            byte[] data=new byte[1024];
            StringBuffer sb = new StringBuffer();
            int length = 0;
            while ((length = inputStream.read(data)) != -1){
                String s = new String(data, Charset.forName("utf-8"));
                sb.append(s);
            }
           String message = sb.toString();
            System.out.println(message);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {

        try {
            URL url = new URL("http://www.baidu.com/");
            URLConnection connection = url.openConnection();

            System.out.println("print HTTP Header =====>>  ");
            ReadUrlTwo.printConnection(connection);
            System.out.println("print HTTP Header Field ======>> ");
            ReadUrlTwo.printHeader(connection);

            // ReadUrl.get(connection);
            ReadUrlTwo.getString(connection);

        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

这个程序被static修饰的变量和方法可以被直接引用
为什么要用static关键字?
答:有一些频繁使用的东西,如果你每次使用都重新new一下,那么这个开销可能会很高,如果使用static,一直放在内存中,那么想用就直接用,而不需要重新new一块空间初始化数据。那么static就是为了实现一个系统的缓存作用的,其生命周期直到应用程序退出结束。

相关方法解释

  • printConnection是与服务器建立连接访问读取HTTP首部的数据
  • printHeader是返回整个首部字段的字段名
  • get就是发送GET请求,调用printHeader方法,connection获得的输入流将指向in使用方法getInputStream()获得的那个输出流,再通过InputStreamReader读取字节
  • getString就是将输入流指向输出流,创建字节数组作为缓冲区,构造字符串sb,定义长度为0,当长度与读取出来的数据一致并不为-1时,则将数据以UTF-8的形式编码,sb添加了一个s,最后新建一个字符串返回该对象的字符串,打印结果

运行结果

print HTTP Header =====>>
Content-Type: text/html
Content-Length: 2381
Content-LengthLong: 2381
Content-encoding: null
Date: 1585235629000
Expires: 0
Last-modified: 0
print HTTP Header Field ======>>
Content-Length: 2381
Content-Type: text/html
Server: bfe
Date: Thu, 26 Mar 2020 15:13:49 GMT
<!DOCTYPE html>
<!--STATUS OK--><html> <head><meta http-equiv=content-type content=text/html;charset=utf-8><meta http-equiv=X-UA-Compatible content=IE=Edge><meta content=always name=referrer><link rel=stylesheet type=text/css href=http://s1.bdstatic.com/r/www/cache/bdorz/baidu.min.css><title>百度一下,你就知道</title></head> <body link=#0000cc> <div id=wrapper> <div id=head> <div class=head_wrapper> <div class=s_form> <div class=s_form_wrapper> <div id=lg> <img hidefocus=true src=//www.baidu.com/img/bd_logo1.png width=270 height=129> </div> <form id=form name=f action=//www.baidu.com/s class=fm> <input type=hidden name=bdorz_come value=1> <input type=hidden name=ie value=utf-8> <input type=hidden name=f value=8> <input type=hidden name=rsv_bp value=1> <input type=hidden name=rsv_idx value=1> <input type=hidden name=tn value=baidu><span class="bg s_ipt_wr"><input id=kw name=wd class=s_ipt value maxlength=255 autocomplete=off autofocus></span><span class="bg s_btn_wr"><input type=submit id=su value=??度一下 class="bg s_btn"></span> </form> </div> </div> <div id=u1> <a href=http://news.baidu.com name=tj_trnews class=mnav>新闻</a> <a href=http://www.hao123.com name=tj_trhao123 class=mnav>hao123</a> <a href=http://map.baidu.com name=tj_trmap class=mnav>地图</a> <a href=http://v.baidu.com name=tj_trvideo class=mnav>视频</a> <a href=http://tieba.baidu.com name=tj_trtieba class=mnav>贴吧</a> <noscript> <a href=http://www.baidu.com/bdorz/login.gif?login&amp;tpl=mn&amp;u=http%3A%2F%2Fwww.baidu.com%2f%3fbdorz_come%3d1 name=tj_login class=lb>登录</a> </noscript> <script>document.write('<a href="http://www.baidu.com/bdorz/login.gif?login&tpl=mn&u='+ encodeURIComponent(window.location.href+ (window.location.search === "" ? "?" : "&")+ "bdorz_come=1")+ '" name="tj_login" class="lb">登录</a>');</script> <a href=//www.baidu.com/more/ name=tj_briicon class=bri style="display: block;">更多产品</a> </div> </div> </div> <div id=ftCon> <div id=ftConw> <p id=lh> <a href=http://home.baidu.com>关于百度</a> <a href=http://ir.baidu.com>About Baidu</a> </p> <p id=cp>&copy;2017&nbsp;Baidu&nbsp;<a href=http://www.baidu.com/duty/>使用百度前必读</a>&nbsp; <a href=http://jianyi.baidu.com/ class=cp-feedback>意 见反馈</a>&nbsp;京ICP证030173号&nbsp; <img src=//www.baidu.com/img/gs.gif> </p> </div> </div> </div> </body> </html>
a> <a href=http://tieba.baidu.com name=tj_trtieba class=mnav>贴吧</a> <noscript> <a href=http://www.baidu.com/bdorz/login.gif?login&amp;tpl=mn&amp;u=http%3A%2F%2Fwww.baidu.com%2f%3fbdorz_come%3d1 name=tj_login class=lb> 登录</a> </noscript> <script>document.write('<a href="http://www.baidu.com/bdorz/login.gif?login&tpl=mn&u='+ encodeURIComponent(window.location.href+ (window.location.search === "" ? "?" : "&")+ "bdorz_come=1")+ '" name="tj_login" class="lb">登录</a>');</script> <a href=//www.baidu.com/more/ name=tj_briicon class=bri style="display: block;">更多产品</a> </div> </div> </div> <div id=ftCon> <div id=ftConw> <p id=lh> <a href=http://home.baidu.com>关于百度</
发布了36 篇原创文章 · 获赞 0 · 访问量 851

猜你喜欢

转载自blog.csdn.net/Z1998hx0919/article/details/105131756