java实现单词字典数据(前缀树)的插入与查询

树是计算机中使用频率很高的一种算法,使用场景很多,如数据库的二叉树实现数据的快速查询

下面代码是用java实现 一个简单的英文单词字典树的插入和查询:

package cn.org.idto.client;

import java.util.Arrays;
import java.util.Map;

/**
 * @author idto
 * @title: TrieTest
 * @description: 树的插入和查询
 * @date 2022/1/12 9:12
 */
public class TrieTest {

    /**
     * 字典内容只限英文小写字符
     */
    private static final int CHAR_SIZE = 26;
    /**
     * 第一个字符,用于计算目标字符的距离(下标)
     */
    private static final char FIRST_CHAR = 'a';


    private TrieNode root;

    /**
     * 树节点对象
     */
    private static class TrieNode {
        /**
         * 子节点对象
         */
        private TrieNode[] children;

        /**
         * 是否是一个完整的单词标识
         */
        private boolean isWordEnd;
        /**
         * 单词
         */
        private String key;
        /**
         * 单词的解释
         */
        private String desc;

        /**
         * 构造方法默认非叶子节点,每个节点初始化为null
         */
        public TrieNode() {
            isWordEnd = false;
            children = new TrieNode[CHAR_SIZE];
            for (int index = 0; index < CHAR_SIZE; index++) {
                children[index] = null;
                key = null;
                desc = null;
            }
        }

        @Override
        public String toString() {
            return "TrieNode{" +
                    "children=..." +
                    ", isWordEnd=" + isWordEnd +
                    ", key='" + key + '\'' +
                    ", desc='" + desc + '\'' +
                    '}';
        }
    }

    /**
     * 遍历数据 将key插入字典树(前缀树)
     *
     * @param key  单词
     * @param desc 单词的注释
     */
    public void insert(String key, String desc) {
        // 首个字符插入深度为0(根据点)
        TrieNode tempNode = root;
        int index;
        char[] keyArr = key.toCharArray();
        for (char eachChar : keyArr) {
            // 计算字符在ascii码中与字符a的距离 得出对应深度中字典中的下标
            index = eachChar - FIRST_CHAR;
            try {
                if (tempNode.children[index] == null) {
                    tempNode.children[index] = new TrieNode();
                }
            } catch (ArrayIndexOutOfBoundsException e) {
                System.err.println("key[" + key + "]含其他字符,不能插入字典");
                return;
            }
            // 本次插入完成,深度切换(+1)
            tempNode = tempNode.children[index];
        }
        // 最后标记叶子结点
        tempNode.isWordEnd = true;
        tempNode.key = key;
        tempNode.desc = desc;
    }

    /**
     * 批量插入树
     *
     * @param dictionary
     */
    public void insertBatch(Map<String, String> dictionary) {
        if (dictionary == null || dictionary.size() == 0) {
            return;
        }
        for (String word : dictionary.keySet()) {
            this.insert(word, dictionary.get(word));
        }
    }

    /**
     * 遍历数据 查询字典树中是否包含key  包含返回注释
     *
     * @param key
     */
    public TrieNode search(String key) {
        if (key == null || key.isEmpty()) {
            return null;
        }

        // 首个字符查询深度为0(根据点)
        TrieNode targetNode = root;
        char[] keyArr = key.toCharArray();
        int index;

        for (char eachChar : keyArr) {
            index = eachChar - FIRST_CHAR;
            try {
                if (targetNode.children[index] == null) {
                    return null;
                }
            } catch (ArrayIndexOutOfBoundsException e) {
                System.err.println("key[" + key + "]含其他字符,不在字典中");
                return null;
            }
            // 查询深度有了后切换深度,为下次查询准备
            targetNode = targetNode.children[index];
        }
        if ((targetNode != null && targetNode.isWordEnd)) {
            return targetNode;
        } else {
            return null;
        }
    }



}

使用示例:

    public static void main(String[] args) {
        String[] keys = {"idto", "idtolerate", "preserve", "evidence", "idto315"};

        TrieTest trieTest = new TrieTest();
        trieTest.root = new TrieNode();

        for (int index = 0; index < keys.length; index++) {
            trieTest.insert(keys[index], keys[index] + "_desc");
        }
        System.out.println("idto result : " + trieTest.search("idto").toString());
        System.out.println("evidence result : " + trieTest.search("evidence").toString());
        System.out.println("idto315 result : " + trieTest.search("idto315"));
        System.out.println("id result : " + trieTest.search("id"));

        /** 结果打印
         * key[idto315]含其他字符,不能插入字典
         * key[idto315]含其他字符,不在字典中
         * idto result : TrieNode{children=..., isWordEnd=true, key='idto', desc='idto_desc'}
         * idto315 result : null
         * evidence result : TrieNode{children=..., isWordEnd=true, key='evidence', desc='evidence_desc'}
         * id result : null
         * 
         * */
        
    }

猜你喜欢

转载自blog.csdn.net/idto315/article/details/122447277