java集合类之ConcurrentHashMap

本文从线程安全的角度结合源代码介绍了ConcurrentHashMap，不介绍与HashMap雷同的部分，如果对HashMap的实现有兴趣，可以参考java 集合类之HashMap。
本文介绍的ConcurrentHashMap基于Java1.8源代码，ConcurrentHashMap的实现在1.8有重大调整，使用CAS的方式取代了之前的分段锁。

sizeCtl

sizeCtl 是一个标志量，下面的判断很多都是基于这个值来判断的，需要首先了解一下不同状态下的sizeCtl
只调用了构造函数，还没初始化表的时候 sizeCtl=0
一个线程成功竞争到了initTable的资格，sizeCtl = -1
resize过程中，sizeCtl = (rs<<16) + 正在参与transfer的线程的数目+1,其中rs是一个第17位为1的正数
已完成初始化且不在resize状态，sizeCtl 是数组容量的3/4，起到类似HashMap的threshold的作用

initTable()

初始化表格

    /**
     * Initializes table, using the size recorded in sizeCtl.
     */
    private final Node<K,V>[] initTable() {
        Node<K,V>[] tab; int sc;
        while ((tab = table) == null || tab.length == 0) {
        //  如果多个线程竞争初始化，竞争失败的线程自旋等待
            if ((sc = sizeCtl) < 0)
                Thread.yield(); // lost initialization race; just spin
                // cas 设置sizeCtl的值，需要注意的是这里没有使用传统的方法对sizeCtl使用等号赋值
                // SIZECTL这个变量在类的静态初始化块中保存了sizeCtl这个变量的地址，直接通过内存
                // 的cas 为sizeCtl field设置值
            else if (U.compareAndSwapInt(this, SIZECTL, sc, -1)) {
                try {
                // 初始化表并记录threshold
                    if ((tab = table) == null || tab.length == 0) {
                        int n = (sc > 0) ? sc : DEFAULT_CAPACITY;
                        @SuppressWarnings("unchecked")
                        Node<K,V>[] nt = (Node<K,V>[])new Node<?,?>[n];
                        table = tab = nt;
                        sc = n - (n >>> 2);
                    }
                } finally {
                // 最后将sizeCtl的值设置为数组长度的四分之三，这个sizeCtl充当的角色就是
                // HashMap里面的threshold，用来判断是否扩容
                    sizeCtl = sc;
                }
                break;
            }
        }
        return tab;
    }

put()

put()方法用于向ConcurrentHashMap中添加数据，需要被设计成原子的操作。

    /**
     * Maps the specified key to the specified value in this table.
     * Neither the key nor the value can be null.
     * key和value都不能是null
     *
     * <p>The value can be retrieved by calling the {@code get} method
     * with a key that is equal to the original key.
     *
     * @param key key with which the specified value is to be associated
     * @param value value to be associated with the specified key
     * @return the previous value associated with {@code key}, or
     *         {@code null} if there was no mapping for {@code key}
     * @throws NullPointerException if the specified key or value is null
     */
    public V put(K key, V value) {
        return putVal(key, value, false);
    }

    /** Implementation for put and putIfAbsent */
    final V putVal(K key, V value, boolean onlyIfAbsent) {
        if (key == null || value == null) throw new NullPointerException();
        int hash = spread(key.hashCode());
        int binCount = 0;
        for (Node<K,V>[] tab = table;;) {
            Node<K,V> f; int n, i, fh;
            // 如果表还没初始化，那么初始化这个表
            if (tab == null || (n = tab.length) == 0)
                tab = initTable();
             //  如果对应的index还没有数据，那么使用cas新建一个bucket，添加就完成了，跳出循环
             // 如果这里cas失败了，那么就会再循环，然后往这个新增的bucket后面追加数据
            else if ((f = tabAt(tab, i = (n - 1) & hash)) == null) {
                if (casTabAt(tab, i, null,
                             new Node<K,V>(hash, key, value, null)))
                    break;                   // no lock when adding to empty bin
            }
            // 如果当前正在迁移数据，那么本线程加入迁移工作
            else if ((fh = f.hash) == MOVED)
                tab = helpTransfer(tab, f);
            else {
            // 当前的hash对应的bucket已经有值了，加锁，执行插入操作，
            // 以下的插入操作和HashMap十分类似
                V oldVal = null;
                synchronized (f) {
                // 注意这里又重新检验了一次，确认在比对完成到加锁的这段时间，
                // 这个链表的头节点没有发生变化
                    if (tabAt(tab, i) == f) {
                        if (fh >= 0) {
                            binCount = 1;
                            for (Node<K,V> e = f;; ++binCount) {
                                K ek;
                                // 如果这个key已经存在，那么直接替换
                                if (e.hash == hash &&
                                    ((ek = e.key) == key ||
                                     (ek != null && key.equals(ek)))) {
                                    oldVal = e.val;
                                    if (!onlyIfAbsent)
                                        e.val = value;
                                    break;
                                }
                                Node<K,V> pred = e;
                                // 如果遍历到尾部发现还是没有这个key,那么直接新增一个
                                if ((e = e.next) == null) {
                                    pred.next = new Node<K,V>(hash, key,
                                                              value, null);
                                    break;
                                }
                            }
                        }
                        // 当前链表已经转化成了树，往树中插入节点
                        else if (f instanceof TreeBin) {
                            Node<K,V> p;
                            binCount = 2;
                            if ((p = ((TreeBin<K,V>)f).putTreeVal(hash, key,
                                                           value)) != null) {
                                oldVal = p.val;
                                if (!onlyIfAbsent)
                                    p.val = value;
                            }
                        }
                    }
                }
                // 计算链表的长度并判断是否要转化为树
                if (binCount != 0) {
                    if (binCount >= TREEIFY_THRESHOLD)
                        treeifyBin(tab, i);
                    if (oldVal != null)
                        return oldVal;
                    break;
                }
            }
        }
        // 成功插入之后调整size
        addCount(1L, binCount);
        return null;
    }

addCount()

addCount（）函数在put操作之后调整大小并决定是否要开始扩容

    /**
     * Adds to count, and if table is too small and not already
     * resizing, initiates transfer. If already resizing, helps
     * perform transfer if work is available.  Rechecks occupancy
     * after a transfer to see if another resize is already needed
     * because resizings are lagging additions.
     *
     * @param x the count to add
     * @param check if <0, don't check resize, if <= 1 only check if uncontended
     */
    private final void addCount(long x, int check) {
        CounterCell[] as; long b, s;
        // 如果countCells不为空或者cas更新base count 出错那么就执行下面的代码
        if ((as = counterCells) != null ||
            !U.compareAndSwapLong(this, BASECOUNT, b = baseCount, s = b + x)) {
            CounterCell a; long v; int m;
            boolean uncontended = true;
            // 如果as是null或者当前的as的长度为0或者当前线程对应的countCell是空或者cas更新当前线程对应的count cell 出错
            if (as == null || (m = as.length - 1) < 0 ||
                (a = as[ThreadLocalRandom.getProbe() & m]) == null ||
                !(uncontended =
                  U.compareAndSwapLong(a, CELLVALUE, v = a.value, v + x))) {
                fullAddCount(x, uncontended);
                return;
          }
            if (check <= 1)
                return;
            // 记录现有的size
            s = sumCount();
        }
        // 如果要check resize 
        if (check >= 0) {
            Node<K,V>[] tab, nt; int n, sc;
            // 如果现在的大小大于了sizeCtl,并且当前size还有继续扩展的空间，那么rezie
            while (s >= (long)(sc = sizeCtl) && (tab = table) != null &&
                   (n = tab.length) < MAXIMUM_CAPACITY) {
                   // 计算一个和长度对应的标识
                int rs = resizeStamp(n);
                // resizeStamp(n) = Integer.numberOfLeadingZeros(n) | (1 << (RESIZE_STAMP_BITS - 1));
                // rs高16位都是0.第16位是1，后面的是n高位连续0的个数
                // sc小于0 表示正在扩容
                if (sc < 0) {
                // 首先注意这里有个[bug](https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8214427)
                // sc ==  rs+1 这个不可能成立，因为sc <0且rs>0,这里应该是 
                // sc  ==  ( rs<<<RESIZE_STAMP_SHIFT ) +1 || sc  ==  ( rs<<<RESIZE_STAMP_SHIFT ) + MAX_RESIZERS
                // sc  ==  ( rs<<<RESIZE_STAMP_SHIFT ) +1  判断扩容是否结束，这是怎么实现的呢，因为第一个线程开始transfer的时候
                // 设置size ctl = (rs<<RESIZE_STAMP_SHIFT)+2,之后每个线程加入transfer会将size ctl 加1，退出transfer的时候会将size ctl 减1
                // 这样当完成transfer的时候，size ctl = (rs<<RESIZE_STAMP_SHIFT)+1
                // (sc >>> RESIZE_STAMP_SHIFT) == rs + MAX_RESIZERS 判断helpTransfer的线程是不是达到了限制数量
                // (sc >>> RESIZE_STAMP_SHIFT) != rs 是不是size ctl 发生了变化
                // (nt = nextTable) == null  结束扩容了
                // transferIndex <= 0 也表示扩容结束了
                    if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 ||
                        sc == rs + MAX_RESIZERS || (nt = nextTable) == null ||
                        transferIndex <= 0)
                        break;
                        // 如果cas 成功更新了size ctl 那么开始转移数据
                    if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1))
                        transfer(tab, nt);
                }
                // 还没开始扩容，那么开始扩容，
                else if (U.compareAndSwapInt(this, SIZECTL, sc,
                                             (rs << RESIZE_STAMP_SHIFT) + 2))
                    transfer(tab, null);
                s = sumCount();
            }
        }
    }

transfer()

transfer实现扩容后数据的迁移

    /**
     * Moves and/or copies the nodes in each bin to new table. See
     * above for explanation.
     * copy on write 的转移模式
     */
    private final void transfer(Node<K,V>[] tab, Node<K,V>[] nextTab) {
        int n = tab.length, stride;
        // 根据cpu的个数确定步长，即一次transfer需要处理的桶的个数
        if ((stride = (NCPU > 1) ? (n >>> 3) / NCPU : n) < MIN_TRANSFER_STRIDE)
            stride = MIN_TRANSFER_STRIDE; // subdivide range
            // 如果还没初始化下个表，就初始化nextTable
        if (nextTab == null) {            // initiating
            try {
                @SuppressWarnings("unchecked")
                Node<K,V>[] nt = (Node<K,V>[])new Node<?,?>[n << 1];
                nextTab = nt;
            } catch (Throwable ex) {      // try to cope with OOME
                sizeCtl = Integer.MAX_VALUE;
                return;
            }
            nextTable = nextTab;
            transferIndex = n;
        }
        int nextn = nextTab.length;
        ForwardingNode<K,V> fwd = new ForwardingNode<K,V>(nextTab);
        boolean advance = true;
        boolean finishing = false; // to ensure sweep before committing nextTab
        for (int i = 0, bound = 0;;) {
            Node<K,V> f; int fh;
            // 这个while块是调整transferIndex的地方，如果调整成功了，那么就开始执行迁移工作
            while (advance) {
                int nextIndex, nextBound;
                // 如果这一批的transfer 还没到bound的位置
                if (--i >= bound || finishing)
                    advance = false;
                else if ((nextIndex = transferIndex) <= 0) {
                    i = -1;
                    advance = false;
                }
                // 如果成功更新了transferIndex就开始transfer工作
                // 这里的i 表示的是这次transfer要处理的桶的下标，这个是从大到小递减的
                // nextBound 表示的是这次transfer要处理到的最后一个桶的下标
                else if (U.compareAndSwapInt
                         (this, TRANSFERINDEX, nextIndex,
                          nextBound = (nextIndex > stride ?
                                       nextIndex - stride : 0))) {
                    bound = nextBound;
                    i = nextIndex - 1;
                    advance = false;
                }
            }
            // 越界检查，如果要处理的下标越界了，
            if (i < 0 || i >= n || i + n >= nextn) {
                int sc;
                // 如果完成了本次transfer,那么标志整个transfer完成
                if (finishing) {
                    nextTable = null;
                    table = nextTab;
                    sizeCtl = (n << 1) - (n >>> 1);
                    return;
                }
                 // 那么可能是出错了
                if (U.compareAndSwapInt(this, SIZECTL, sc = sizeCtl, sc - 1)) {
                // 如果这条线程不是最后一个负责transfer的，那就退出
                    if ((sc - 2) != resizeStamp(n) << RESIZE_STAMP_SHIFT)
                        return;
                    // 继续处理
                    finishing = advance = true;
                    i = n; // recheck before commit
                }
            }
            // 如果本次要负责的这个桶没有数据，那么设置为fwd
            else if ((f = tabAt(tab, i)) == null)
                advance = casTabAt(tab, i, null, fwd);
            // 如果本次负责的这个桶已经被处理了，直接将advance设置为true,继续下次的transfer
            else if ((fh = f.hash) == MOVED)
                advance = true; // already processed
            else {
            // transfer 桶的过程
                synchronized (f) {
                    if (tabAt(tab, i) == f) {
                        Node<K,V> ln, hn;
                        // 判断是不是链表，TreeBin的hash 是-2，ForwardNode的hash是-1
                        if (fh >= 0) {
                        // 计算这个桶的hash的二进制低位第n位的值
                            int runBit = fh & n;
                            Node<K,V> lastRun = f;
                            // 这个for循环的代码可以看出是为了找到一个lastRun,那么找到这个lastRun的目的是什么呢
                            // 了解HashMap resize()过程的人肯定知道，这整个分支的工作是将链表分为两类LOW和HIGH，LOW的hash&n==0
                            // 这一部分在扩容之后的index计算不受影响，HIGH的hash&n==n，这一部分新的index应该加上n,
                            // 这段代码的作用是在一个链表的尾部有若干个类别相同的节点，那么在后面的划分时当作一个整体处理，就会减少开销
                            for (Node<K,V> p = f.next; p != null; p = p.next) {
                                int b = p.hash & n;
                                if (b != runBit) {
                                    runBit = b;
                                    lastRun = p;
                                }
                            }
                            // 设置重用的尾部的那一部分链作为高头还是低头
                            if (runBit == 0) {
                                ln = lastRun;
                                hn = null;
                            }
                            else {
                                hn = lastRun;
                                ln = null;
                            }
                            // 头插法插入不同类型的节点到两个链表中
                            for (Node<K,V> p = f; p != lastRun; p = p.next) {
                                int ph = p.hash; K pk = p.key; V pv = p.val;
                                if ((ph & n) == 0)
                                    ln = new Node<K,V>(ph, pk, pv, ln);
                                else
                                    hn = new Node<K,V>(ph, pk, pv, hn);
                            }
                            // 将两个链表放到不同的位置并设置当前的节点为fwd
                            setTabAt(nextTab, i, ln);
                            setTabAt(nextTab, i + n, hn);
                            setTabAt(tab, i, fwd);
                            advance = true;
                        }
                        else if (f instanceof TreeBin) {
                            TreeBin<K,V> t = (TreeBin<K,V>)f;
                            TreeNode<K,V> lo = null, loTail = null;
                            TreeNode<K,V> hi = null, hiTail = null;
                            int lc = 0, hc = 0;
                            // 将红黑树划分为两棵树，划分的依据和链表相同
                            for (Node<K,V> e = t.first; e != null; e = e.next) {
                                int h = e.hash;
                                TreeNode<K,V> p = new TreeNode<K,V>
                                    (h, e.key, e.val, null, null);
                                if ((h & n) == 0) {
                                    if ((p.prev = loTail) == null)
                                        lo = p;
                                    else
                                        loTail.next = p;
                                    loTail = p;
                                    ++lc;
                                }
                                else {
                                    if ((p.prev = hiTail) == null)
                                        hi = p;
                                    else
                                        hiTail.next = p;
                                    hiTail = p;
                                    ++hc;
                                }
                            }
                            // 先用链表保存树节点，判断划分之后的长度，决定是转为链表还是创建红黑树并插入TreeBin
                            ln = (lc <= UNTREEIFY_THRESHOLD) ? untreeify(lo) :
                                (hc != 0) ? new TreeBin<K,V>(lo) : t;
                            hn = (hc <= UNTREEIFY_THRESHOLD) ? untreeify(hi) :
                                (lc != 0) ? new TreeBin<K,V>(hi) : t;
                            setTabAt(nextTab, i, ln);
                            setTabAt(nextTab, i + n, hn);
                            setTabAt(tab, i, fwd);
                            advance = true;
                        }
                    }
                }
            }
        }
    }

helpTransfer()

helpTransfer（）使线程在put操作时如果检测到正在扩容那么就帮助扩容

/**
     * Helps transfer if a resize is in progress.
     */
    final Node<K,V>[] helpTransfer(Node<K,V>[] tab, Node<K,V> f) {
        Node<K,V>[] nextTab; int sc;
        // tab!=null 表示扩容尚未完成
        // f instanceof ForwadingNode 表示正在扩容，因为ForwardingNode的hash是MOVED,也是因为MOVED才进入这个方法
        // nextTab不为空也表示扩容正在继续
        if (tab != null && (f instanceof ForwardingNode) &&
            (nextTab = ((ForwardingNode<K,V>)f).nextTable) != null) {
            // 计算这次扩容的resizeStamp，
            int rs = resizeStamp(tab.length);
            // 再次判断所有的值是否变了
            while (nextTab == nextTable && table == tab &&
                   (sc = sizeCtl) < 0) {
                   // 这里是判断是否完成以及帮助扩容的线程数是否达到限制。注意这里有bug
                if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 ||
                    sc == rs + MAX_RESIZERS || transferIndex <= 0)
                    break;
                // 如果成功将自己加入扩容，那么开始扩容
                if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1)) {
                    transfer(tab, nextTab);
                    break;
                }
            }
            return nextTab;
        }
        return table;
    }

V get(K)

按照key 查找value

/**
     * Returns the value to which the specified key is mapped,
     * or {@code null} if this map contains no mapping for the key.
     *
     * <p>More formally, if this map contains a mapping from a key
     * {@code k} to a value {@code v} such that {@code key.equals(k)},
     * then this method returns {@code v}; otherwise it returns
     * {@code null}.  (There can be at most one such mapping.)
     *
     * @throws NullPointerException if the specified key is null
     */
    public V get(Object key) {
        Node<K,V>[] tab; Node<K,V> e, p; int n, eh; K ek;
        int h = spread(key.hashCode());
        // 如果存在且没有碰撞，且不是红黑树的节点，那么直接返回
        if ((tab = table) != null && (n = tab.length) > 0 &&
            (e = tabAt(tab, (n - 1) & h)) != null) {
            if ((eh = e.hash) == h) {
                if ((ek = e.key) == key || (ek != null && key.equals(ek)))
                    return e.val;
            }
            // 如果hash小于0表示是一个tree bin则使用树的方法查找
            else if (eh < 0)
                return (p = e.find(h, key)) != null ? p.val : null;
            // 如果是个链表，那就遍历链表查找
            while ((e = e.next) != null) {
                if (e.hash == h &&
                    ((ek = e.key) == key || (ek != null && key.equals(ek))))
                    return e.val;
            }
        }
        return null;
    }