java正则相关工具类

去掉小数点后多余的.和0

/**
 * 去掉小数后面多余的.和0
 */
public static BigDecimal subZeroAndDot(BigDecimal num){
    
    
    String s = num.toString();
    if(s.indexOf(".") > 0){
    
    
        //去掉多余的0
        s = s.replaceAll("0+?$", "");
        //如最后一位是.则去掉
        s = s.replaceAll("[.]$", "");
        return new BigDecimal(s);
    }
    return num;
}

移除富文本内容中的html标签

/**
 * 提取富文本中纯文本
 */
public static String getNoImgText(String inputString) {
    
    
    if (inputString == null)
        return null;
    String textStr = "";
    try {
    
    
        //定义script的正则表达式{或<script[^>]*?>[\\s\\S]*?<\\/script>
        String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";
        //定义style的正则表达式{或<style[^>]*?>[\\s\\S]*?<\\/style>
        String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>";
        // 定义HTML标签的正则表达式
        String regEx_html = "<[^>]+>";
        // 定义一些特殊字符的正则表达式 如：&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
        String regEx_special = "\\&[a-zA-Z]{1,10};";
        //定义换行缩进等
        String regEx_line = "\\s*|\t|\r|\n|";
        //将需要的过滤正则传入
        textStr = getString(inputString, regEx_script, regEx_style, regEx_html, regEx_special,regEx_line);
    } catch (Exception e) {
    
    
        log.info("removeHtmlTag error:{}",e.getMessage(),e);
    }
    return textStr;
}
/**
 * 正则替换
 */
private static String getString(String htmlStr, String... args) {
    
    
    Pattern p_script;
    Matcher m_script;
    for (String regEx: args) {
    
    
        p_script = Pattern.compile(regEx, Pattern.CASE_INSENSITIVE);
        m_script = p_script.matcher(htmlStr);
        htmlStr = m_script.replaceAll(""); // 过滤
    }
    return htmlStr;
}

提取富文本内容中的图片地址

public static List<String> getImgStr(String htmlStr) {
    
    
    List<String> list = new ArrayList<>();
    String img = "";
    Pattern p_image;
    Matcher m_image;
    String regEx_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>";
    p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
    m_image = p_image.matcher(htmlStr);
    while (m_image.find()) {
    
    
        // 得到<img />数据
        img = m_image.group();
        // 匹配<img>中的src数据
        Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);
        while (m.find()) {
    
    
            list.add(m.group(1));
        }
    }
    return list;
}

java正则相关工具类

去掉小数点后多余的.和0

移除富文本内容中的html标签

提取富文本内容中的图片地址

猜你喜欢