- package com.test;
- import java.io.BufferedReader;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.io.OutputStreamWriter;
- import java.net.URL;
- import java.net.URLConnection;
- public class TestPost {
- public static void testPost() throws IOException {
- /**
- * 首先要和URL下的URLConnection对话。 URLConnection可以很容易的从URL得到。比如: // Using
- * java.net.URL and //java.net.URLConnection
- */
- URL url = new URL("http://www.faircanton.com/message/check.asp");
- URLConnection connection = url.openConnection();
- /**
- * 然后把连接设为输出模式。URLConnection通常作为输入来使用,比如下载一个Web页。
- * 通过把URLConnection设为输出,你可以把数据向你个Web页传送。下面是如何做:
- */
- connection.setDoOutput(true);
- /**
- * 最后,为了得到OutputStream,简单起见,把它约束在Writer并且放入POST信息中,例如: ...
- */
- OutputStreamWriter out = new OutputStreamWriter(connection
- .getOutputStream(), "8859_1");
- out.write("username=kevin&password=*********"); //post的关键所在!
- // remember to clean up
- out.flush();
- out.close();
- /**
- * 这样就可以发送一个看起来象这样的POST:
- * POST /jobsearch/jobsearch.cgi HTTP 1.0 ACCEPT:
- * text/plain Content-type: application/x-www-form-urlencoded
- * Content-length: 99 username=bob password=someword
- */
- // 一旦发送成功,用以下方法就可以得到服务器的回应:
- String sCurrentLine;
- String sTotalString;
- sCurrentLine = "";
- sTotalString = "";
- InputStream l_urlStream;
- l_urlStream = connection.getInputStream();
- // 传说中的三层包装阿!
- BufferedReader l_reader = new BufferedReader(new InputStreamReader(
- l_urlStream));
- while ((sCurrentLine = l_reader.readLine()) != null) {
- sTotalString += sCurrentLine + "/r/n";
- }
- System.out.println(sTotalString);
- }
- public static void main(String[] args) throws IOException {
- testPost();
- }
- }
下面让我先去下载一个解析HTTP的jar包,催建下载地址:点击打开链接
htmlparser:解析
- package com.test;
- import org.htmlparser.Node;
- import org.htmlparser.NodeFilter;
- import org.htmlparser.Parser;
- import org.htmlparser.filters.TagNameFilter;
- import org.htmlparser.tags.TableTag;
- import org.htmlparser.util.NodeList;
- /**
- * 标题:利用htmlparser提取网页纯文本的例子
- */
- public class TestHTMLParser {
- public static void testHtml() {
- try {
- String sCurrentLine;
- String sTotalString;
- sCurrentLine = "";
- sTotalString = "";
- java.io.InputStream l_urlStream;
- java.net.URL l_url = new java.net.URL("http://www.ideagrace.com/html/doc/2006/07/04/00929.html");
- java.net.HttpURLConnection l_connection = (java.net.HttpURLConnection) l_url.openConnection();
- l_connection.connect();
- l_urlStream = l_connection.getInputStream();
- java.io.BufferedReader l_reader = new java.io.BufferedReader(new java.io.InputStreamReader(l_urlStream));
- while ((sCurrentLine = l_reader.readLine()) != null) {
- sTotalString += sCurrentLine+"/r/n";
- // System.out.println(sTotalString);
- }
- String testText = extractText(sTotalString);
- System.out.println( testText );
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- public static String extractText(String inputHtml) throws Exception {
- StringBuffer text = new StringBuffer();
- Parser parser = Parser.createParser(new String(inputHtml.getBytes(),"GBK"), "GBK");
- // 遍历所有的节点
- NodeList nodes = parser.extractAllNodesThatMatch(new NodeFilter() {
- public boolean accept(Node node) {
- return true;
- }
- });
- System.out.println(nodes.size()); //打印节点的数量
- for (int i=0;i<nodes.size();i++){
- Node nodet = nodes.elementAt(i);
- //System.out.println(nodet.getText());
- text.append(new String(nodet.toPlainTextString().getBytes("GBK"))+"/r/n");
- }
- return text.toString();
- }
- public static void test5(String resource) throws Exception {
- Parser myParser = new Parser(resource);
- myParser.setEncoding("GBK");
- String filterStr = "table";
- NodeFilter filter = new TagNameFilter(filterStr);
- NodeList nodeList = myParser.extractAllNodesThatMatch(filter);
- TableTag tabletag = (TableTag) nodeList.elementAt(11);
- }
- public static void main(String[] args) throws Exception {
- // test5("http://www.ggdig.com");
- testHtml();
- }
- }