去除String中的html标签,并对<td><tr>进行替换

import java.util.regex.Matcher;
import java.util.regex.Pattern;

private String cleanHtmlTags(String htmlText)
    {
        if (StringUtils.isEmpty(htmlText))
        {
            return "";
        }
        htmlText = htmlText.replaceAll("&nbsp;", " "); // 过滤html标签
        String regEx_html = "<[^>]+>";
        String regEx_td = "<[td]+>";
        String regEx_tr = "<[tr]+>";
        Pattern p_html = Pattern.compile(regEx_td, Pattern.CASE_INSENSITIVE);
        Matcher m_html = p_html.matcher(htmlText);
        htmlText = m_html.replaceAll(" "); // td替换成空格
        
        p_html = Pattern.compile(regEx_tr, Pattern.CASE_INSENSITIVE);
        m_html = p_html.matcher(htmlText);
        htmlText = m_html.replaceAll("\n"); // tr替换成换行
        
        p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
        m_html = p_html.matcher(htmlText);
        htmlText = m_html.replaceAll(""); // 过滤html标签
        
        return htmlText;
    }

猜你喜欢

转载自blog.csdn.net/VeastLee/article/details/82665987
今日推荐