CLR GC分析

CLR 的GC分为两个部分，一个是GC的内存分配，另外一个是GC的垃圾回收。这里我们先讲一下垃圾回收。（QQ群：676817308技术讨论群。欢迎您的加入）

一.垃圾回收的触发条件
1.在C#代码中调用 GC.Collection();
2.物理内存不足的情况下
3.分配量超过分配阈值
4.找不到可分配的内存空间
在满足了以上四个条件中的任何一个，CLR都会触发GC的垃圾回收，清理托管堆空间释放内存，以便下次使用。

二.垃圾回收的流程
1停止其它线程到并且切换到抢占模式（抢占模式不可访问托管堆代码，只可访问非托管）
2.重新定位回收的目标代，并且判断是否执行后台GC回收
3-1.如果执行后台GC回收分为两步（第一：后台标记（标记没被使用的托管堆空间），第二：后台清扫（清扫没被使用的托管堆空间））
3-2.如果不执行后台GC，则执行普通GC，普通GC一共分为五个阶段（1，标记。2计划。3.重定位。4.压缩。5清扫。）
4.恢复其它线程到合作模式
以上为GC垃圾回收的全部过程。

三.具体的实施细则
前面提到了垃圾回收的触发条件,第一个条件是直接在C#代码中调用GC.Collection.这个不用多说，人人都会。第二个是物理内存不足，获取物理内存信息调用了vc++标准函数GetProcessMemoryLoad，当获取的信息超过一定值的时候会触发GC回收。代码如下：

void GCToOSInterface::GetMemoryStatus(uint32_t* memory_load, uint64_t* available_physical, uint64_t* available_page_file)
{
    
    
    uint64_t restricted_limit = GetRestrictedPhysicalMemoryLimit();
    if (restricted_limit != 0)
    {
    
    
        size_t workingSetSize;
        BOOL status = FALSE;
        if (!g_UseRestrictedVirtualMemory)
        {
    
    
            PROCESS_MEMORY_COUNTERS pmc;
            status = GCGetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
            workingSetSize = pmc.WorkingSetSize;
        }

        if(status)
        {
    
    
            if (memory_load)
                *memory_load = (uint32_t)((float)workingSetSize * 100.0 / (float)restricted_limit);
            if (available_physical)
            {
    
    
                if(workingSetSize > restricted_limit)
                    *available_physical = 0;
                else
                    *available_physical = restricted_limit - workingSetSize;
            }
            // Available page file doesn't mean much when physical memory is restricted since
            // we don't know how much of it is available to this process so we are not going to 
            // bother to make another OS call for it.
            if (available_page_file)
                *available_page_file = 0;

            return;
        }
    }

    MEMORYSTATUSEX ms;
    ::GetProcessMemoryLoad(&ms);
    
    if (g_UseRestrictedVirtualMemory)
    {
    
    
        _ASSERTE (ms.ullTotalVirtual == restricted_limit);
        if (memory_load != NULL)
            *memory_load = (uint32_t)((float)(ms.ullTotalVirtual - ms.ullAvailVirtual) * 100.0 / (float)ms.ullTotalVirtual);
        if (available_physical != NULL)
            *available_physical = ms.ullTotalVirtual;

        // Available page file isn't helpful when we are restricted by virtual memory
        // since the amount of memory we can reserve is less than the amount of
        // memory we can commit.
        if (available_page_file != NULL)
            *available_page_file = 0;
    }
    else
    {
    
    
        if (memory_load != NULL)
            *memory_load = ms.dwMemoryLoad;
        if (available_physical != NULL)
            *available_physical = ms.ullAvailPhys;
        if (available_page_file != NULL)
            *available_page_file = ms.ullAvailPageFile;
    }
}

分配量超过分配阈值的问题，这个是在GC初始化的时候回适当的分配阈值的上下限，当分配的内存超过阈值的上下限的时候也会触发GC的回收。
第0带的阈值下限位0，上线也为6MB左右
第1代的阈值下限位160KB,上限无限制
第二代的阈值下限为 256kb，上限无限制
第二代大对象阈值下限为3MB,上线无限制
至于系数，由于第0代区分服务器和工作战模式所以分别为(20,10以及9,20) 其余代
上下限以及12代大小对象的分别为（2,7 —1.2f,1.8f— 1.25f，4.5f）

1.停止其它线程切换到抢占模式，大致就是停止其它的线程不让其访问托管空间。

  FORCEINLINE_NONDEBUG void EnablePreemptiveGC()
    {
    
    
        LIMITED_METHOD_CONTRACT;

#ifndef DACCESS_COMPILE
        _ASSERTE(this == GetThread());
        _ASSERTE(m_fPreemptiveGCDisabled);
        // holding a spin lock in coop mode and transit to preemp mode will cause deadlock on GC
        _ASSERTE ((m_StateNC & Thread::TSNC_OwnsSpinLock) == 0);

#ifdef ENABLE_CONTRACTS_IMPL
        _ASSERTE(!GCForbidden());
        TriggersGC(this);
#endif

        // ------------------------------------------------------------------------
        //   ** WARNING ** WARNING ** WARNING ** WARNING ** WARNING ** WARNING **  |
        // ------------------------------------------------------------------------
        //
        //   DO NOT CHANGE THIS METHOD WITHOUT VISITING ALL THE STUB GENERATORS
        //   THAT EFFECTIVELY INLINE IT INTO THEIR STUBS
        //
        // ------------------------------------------------------------------------
        //   ** WARNING ** WARNING ** WARNING ** WARNING ** WARNING ** WARNING **  |
        // ------------------------------------------------------------------------

        m_fPreemptiveGCDisabled.StoreWithoutBarrier(0);
#ifdef ENABLE_CONTRACTS
        m_ulEnablePreemptiveGCCount ++;
#endif  // _DEBUG

        if (CatchAtSafePoint())
            RareEnablePreemptiveGC();
#endif
    }

2.判断是否执行后台GC的几个条件，没有禁用后台GC，当前目标代是第二代（后台GC只能第二代），触发GC时候没有请求启用压缩，物理内存占用率不高，不需要启用压缩，第二代碎片空间率不高，不许哟啊启用压缩。
3.关于标记，计划重定位压缩和清扫

备注：
1.GCinfo主要是包括了函数位置，信息变量列表等等。查找当前函数递归链的时候，需要用到它。
GCInfo具体操作如下：
初始化GCinfo 的时候，变量m_reader实例化BitStreamReader的实例。BitStreamReader的结构如下。

class BitStreamReader
{
    
    
public:
    BitStreamReader( PTR_CBYTE pBuffer )
    {
    
    
        SUPPORTS_DAC;

        _ASSERTE( pBuffer != NULL );

        m_pCurrent = m_pBuffer = dac_cast<PTR_size_t>((size_t)dac_cast<TADDR>(pBuffer) & ~((size_t)sizeof(size_t)-1)); // 对其8个bit 位，以便能被8 整除。
        m_RelPos = m_InitialRelPos = (int)((size_t)dac_cast<TADDR>(pBuffer) % sizeof(size_t)) * 8/*BITS_PER_BYTE*/;
    }

    // NOTE: This routine is perf-critical
    __forceinline size_t Read( int numBits )
    {
    
    
        SUPPORTS_DAC;

        _ASSERTE(numBits > 0 && numBits <= BITS_PER_SIZE_T);

        size_t result = (*m_pCurrent) >> m_RelPos;// m_relPos当前已经读取了几个bit 。然后为位移几位
        int newRelPos = m_RelPos + numBits;// 新的位置
        if(newRelPos >= BITS_PER_SIZE_T)// 当新的位置大于64
        {
    
    
            m_pCurrent++;//当前指示位置加1
            newRelPos -= BITS_PER_SIZE_T;// 新的位置，是当前位置减去64
            if(newRelPos > 0)
            {
    
    
                size_t extraBits = (*m_pCurrent) << (numBits - newRelPos);
                result ^= extraBits;
            }
        }
        m_RelPos = newRelPos;//新的位置
        result &= SAFE_SHIFT_LEFT(1, numBits) - 1;// 这个主要是获取最后的numbits位。SAFE_SHIFT_LEFT函数如下__forceinline size_t SAFE_SHIFT_LEFT(size_t x, size_t count){_ASSERTE(count <= BITS_PER_SIZE_T);return (x << 1) << (count - 1);}return result;}
                
    // This version reads one bit, returning zero/non-zero (not 0/1)
    // NOTE: This routine is perf-critical
    __forceinline size_t ReadOneFast()
    {
    
    
        SUPPORTS_DAC;

        size_t result = (*m_pCurrent) & (((size_t)1) << m_RelPos);
        if(++m_RelPos == BITS_PER_SIZE_T)
        {
    
    
            m_pCurrent++;
            m_RelPos = 0;
        }
        return result;
    }

主要有两个函数Read和ReadOneFast以及一个构造函数。构造函数主要是初始化m_pCurrent,m_pBuffer这两个变量。dac_cast<PTR_size_t>((size_t)dac_cast(pBuffer) & ~((size_t)sizeof(size_t)-1));这段代码是对其内存字节8个bit位，也就是能被8整除。read函数的主要作用是看上面注释。ReadOneFast读取一位，然后返回。

2.标记阶段
标记阶段就是标记需要回收垃圾的托管堆的存活的对象，主要有，从当前线程递归调用链获取到GCinfo 信息，然后通过GCInfo获取到栈对象和寄存器对象，然后遍历栈或者寄存器对象，来标记托管堆中的存活对象
执行函数顺序 GCScan::GcScanRoots （gc.cpp） ->GCToEEInterface::GcScanRoots（gcsacn.cpp） ->ScanStackRoots(gcenv.cc.cpp)->StackWalkFrames(stackwalk.cpp)-》StackWalkFramesEx(stackwalk.cpp)
StackWalkFramesEx函数如下：

StackWalkAction Thread::StackWalkFramesEx(
                    PREGDISPLAY pRD,        // virtual register set at crawl start
                    PSTACKWALKFRAMESCALLBACK pCallback,
                    VOID *pData,
                    unsigned flags,
                    PTR_Frame pStartFrame
                )
{
    
    
    // Note: there are cases (i.e., exception handling) where we may never return from this function. This means
    // that any C++ destructors pushed in this function will never execute, and it means that this function can
    // never have a dynamic contract.
    STATIC_CONTRACT_WRAPPER;
    STATIC_CONTRACT_SO_INTOLERANT;
    SCAN_IGNORE_THROW;            // see contract above
    SCAN_IGNORE_TRIGGER;          // see contract above

    _ASSERTE(pRD);
    _ASSERTE(pCallback);

    // when POPFRAMES we don't want to allow GC trigger.
    // The only method that guarantees this now is COMPlusUnwindCallback
#ifdef STACKWALKER_MAY_POP_FRAMES
    ASSERT(!(flags & POPFRAMES) || pCallback == (PSTACKWALKFRAMESCALLBACK) COMPlusUnwindCallback);
    ASSERT(!(flags & POPFRAMES) || pRD->pContextForUnwind != NULL);
    ASSERT(!(flags & POPFRAMES) || (this == GetThread() && PreemptiveGCDisabled()));
#else // STACKWALKER_MAY_POP_FRAMES
    ASSERT(!(flags & POPFRAMES));
#endif // STACKWALKER_MAY_POP_FRAMES

    // We haven't set the stackwalker thread type flag yet, so it shouldn't be set. Only
    // exception to this is if the current call is made by a hijacking profiler which
    // redirected this thread while it was previously in the middle of another stack walk
#ifdef PROFILING_SUPPORTED
    _ASSERTE(CORProfilerStackSnapshotEnabled() || !IsStackWalkerThread());
#else
    _ASSERTE(!IsStackWalkerThread());
#endif

    StackWalkAction retVal = SWA_FAILED;

    {
    
    
        // SCOPE: Remember that we're walking the stack.
        // 
        // Normally, we'd use a holder (ClrFlsThreadTypeSwitch) to temporarily set this
        // flag in the thread state, but we can't in this function, since C++ destructors
        // are forbidden when this is called for exception handling (which causes
        // MakeStackwalkerCallback() not to return). Note that in exception handling
        // cases, we will have already cleared the stack walker thread state indicator inside
        // MakeStackwalkerCallback(), so we will be properly cleaned up.
#if !defined(DACCESS_COMPILE)
        PVOID pStackWalkThreadOrig = ClrFlsGetValue(TlsIdx_StackWalkerWalkingThread);
#endif
        SET_THREAD_TYPE_STACKWALKER(this);

        StackFrameIterator iter;
        if (iter.Init(this, pStartFrame, pRD, flags) == TRUE)
        {
    
    
            while (iter.IsValid())
            {
    
    
                retVal = MakeStackwalkerCallback(&iter.m_crawl, pCallback, pData DEBUG_ARG(iter.m_uFramesProcessed));
                if (retVal == SWA_ABORT)
                {
    
    
                    break;
                }

                retVal = iter.Next(); //这里会用update 来更新 prd 以便后面获取rsp 寄存器的值，找出需要标记的对象
                if (retVal == SWA_FAILED)
                {
    
    
                    break;
                }
            }
        }

        SET_THREAD_TYPE_STACKWALKER(pStackWalkThreadOrig);
    }

    return retVal;
} // StackWalkAction Thread::StackWalkFramesEx()

这个函数只有两个看点，其一是 retVal = iter.Next(); //这里会用update 来更新 prd 以便后面获取rsp 寄存器的值，找出需要标记的对象。其二就是从函数GcScanRoots开始就会传入回调GCHeap::Promote，这个函数负责标记找出来对象的存活。在ScanStackRoots函数里面这个回调函数被包装成了GC上下文，也就是GCCONTEXT gcctx； gcctx.f=回调函数fn。然后当ScanStackRoots调用StackWalkFrames的时候又传入了回调函数GcStackCrawlCallBack。这个函数主要是解析传入的回调标记函数GCHeap::Promote，然后执行他。执行过程非常复杂。基本如下：
GcStackCrawlCallBack-》EnumGcRefs-》EnumerateLiveSlots（这个函数里面有个GcSlotDecoder，这个是个关键函数，通过JIT 生成的GCinof 实例化它，然后获取slot和对象的偏移地址，也就是上的RSP+这里的偏移地址，然后传到回调的标记处理函数GCHeap::Promote，以便标记）
获取对象的代码如下

OBJECTREF* pObjRef = GetStackSlot(spOffset, spBase, pRD);
OBJECTREF* GcInfoDecoder::GetStackSlot(
                        INT32           spOffset,
                        GcStackSlotBase spBase,
                        PREGDISPLAY     pRD
                        )
{
    
    
   pObjRef = (OBJECTREF*) ((SIZE_T)pRD->SP + spOffset);
}

注意这里的pRD是经过上面retVal = iter.Next() 的Next()函数里的update 更新过的，包含了寄存器Rsp的值。而spOffset是对象的偏移地址。具体获取方式就是:

slotDecoder.DecodeSlotTable(m_Reader); // 初始化slotDecoder
const GcSlotDesc* pSlot = slotDecoder.GetSlotDesc(slotIndex);// 获取pslot
INT32 spOffset = pSlot->Slot.Stack.SpOffset; //获取对象相对于寄存器rsp 的偏移值

3.每次GC结束之后，都会重新计算分配量的阈值，以便下次分配对象使用，代码如下：

size_t gc_heap::desired_new_allocation (dynamic_data* dd,// 动态数据
                                        size_t out,// GC 结束之后该代存活对象的大小
                                         int gen_number, // 被GC的代
                                        int pass)
{
    
    
    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();

    if (dd_begin_data_size (dd) == 0)//假如说GC 开始前该代存活对象大小为零
    {
    
    
        size_t new_allocation = dd_min_size (dd);// 获取阈值的下限赋值给新的分配量new_allocation
        current_gc_data_per_heap->gen_data[gen_number].new_allocation = new_allocation;  //把新分配量存入动态数据      
        return new_allocation;//返回
    }
    else
    {
    
    
        float     cst;// 对象存活率
        size_t    previous_desired_allocation = dd_desired_allocation (dd);// 就分配量阈值
        size_t    current_size = dd_current_size (dd);// GC 结束之后存活对象的大小
        float     max_limit = dd_max_limit (dd);// 获取阈值系数的上限
        float     limit = dd_limit (dd);// 阈值系数的下限
        size_t    min_gc_size = dd_min_size (dd);//阈值的下限
        float     f = 0;// 阈值的增长系数
        size_t    max_size = dd_max_size (dd);// 阈值的上限
        size_t    new_allocation = 0;// 声明新分配量并且赋值为零
        float allocation_fraction = (float) (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)) / (float) (dd_desired_allocation (dd));// 计算偏移率（allocation_fraction ），GC开始前的实际分配量/ 就分配了阈值 
        if (gen_number >= max_generation)// 如果GC目标大于或者等于第二代对象
        {
    
    
            size_t    new_size = 0; // 声明新分配量

            cst = min (1.0f, float (out) / float (dd_begin_data_size (dd)));//GC的存活率（cst） Gc结束后该代存活对象的大小/ GC开始前该代存活对象的大小 

            f = surv_to_growth (cst, limit, max_limit);//通过GC的存活率，系数的上下限计算阈值增长系数
            size_t max_growth_size = (size_t)(max_size / f);
            if (current_size >= max_growth_size)// 当GC结束后存活对象的大小大于 最大阈值
            {
    
    
                new_size = max_size;// 把最大阈值赋值给新分配量阈值
            }
            else// 否则
            {
    
    
                new_size = (size_t) min (max ( (f * current_size), min_gc_size), max_size);// 阈值的增长系数F 乘以 GC结束后该代存活对象的大小和阈值下限两者取小，然后与阈值上限两者取大。得到结果为新的分配量。
            }

            assert ((new_size >= current_size) || (new_size == max_size));// 新分配量必须大于等于GC结束后对象的大小或者 等于阈值的上限。

            if (gen_number == max_generation)// GC目标代等于第二代对象
            {
    
    
                new_allocation  =  max((new_size - current_size), min_gc_size);

                new_allocation = linear_allocation_model (allocation_fraction, new_allocation, 
                                                          dd_desired_allocation (dd), dd_collection_count (dd));

                if ((dd_fragmentation (dd) > ((size_t)((f-1)*current_size))))
                {
    
    
                    //reducing allocation in case of fragmentation
                    size_t new_allocation1 = max (min_gc_size,
                                                  // CAN OVERFLOW
                                                  (size_t)((float)new_allocation * current_size /
                                                           ((float)current_size + 2*dd_fragmentation (dd))));
                    dprintf (2, ("Reducing max_gen allocation due to fragmentation from %Id to %Id",
                                 new_allocation, new_allocation1));
                    new_allocation = new_allocation1;
                }
            }
            else //large object heap
            {
    
    
                uint32_t memory_load = 0;
                uint64_t available_physical = 0;
                get_memory_info (&memory_load, &available_physical);
                if (heap_number == 0)
                    settings.exit_memory_load = memory_load;
                if (available_physical > 1024*1024)
                    available_physical -= 1024*1024;

                uint64_t available_free = available_physical + (uint64_t)generation_free_list_space (generation_of (gen_number));
                if (available_free > (uint64_t)MAX_PTR)
                {
    
    
                    available_free = (uint64_t)MAX_PTR;
                }

                //try to avoid OOM during large object allocation
                new_allocation = max (min(max((new_size - current_size), dd_desired_allocation (dynamic_data_of (max_generation))), 
                                          (size_t)available_free), 
                                      max ((current_size/4), min_gc_size));

                new_allocation = linear_allocation_model (allocation_fraction, new_allocation,
                                                          dd_desired_allocation (dd), dd_collection_count (dd));

            }
        }
        else
        {
    
    
            size_t survivors = out;
            cst = float (survivors) / float (dd_begin_data_size (dd));
            f = surv_to_growth (cst, limit, max_limit);
            new_allocation = (size_t) min (max ((f * (survivors)), min_gc_size), max_size);

            new_allocation = linear_allocation_model (allocation_fraction, new_allocation, 
                                                      dd_desired_allocation (dd), dd_collection_count (dd));

            if (gen_number == 0)
            {
    
    
                if (pass == 0)
                {
    
    

                    //printf ("%f, %Id\n", cst, new_allocation);
                    size_t free_space = generation_free_list_space (generation_of (gen_number));
                    // DTREVIEW - is min_gc_size really a good choice? 
                    // on 64-bit this will almost always be true.
                    dprintf (GTC_LOG, ("frag: %Id, min: %Id", free_space, min_gc_size));
                    if (free_space > min_gc_size)
                    {
    
    
                        settings.gen0_reduction_count = 2;
                    }
                    else
                    {
    
    
                        if (settings.gen0_reduction_count > 0)
                            settings.gen0_reduction_count--;
                    }
                }
                if (settings.gen0_reduction_count > 0)
                {
    
    
                    dprintf (2, ("Reducing new allocation based on fragmentation"));
                    new_allocation = min (new_allocation,
                                          max (min_gc_size, (max_size/3)));
                }
            }
        }

        size_t new_allocation_ret = 
            Align (new_allocation, get_alignment_constant (!(gen_number == (max_generation+1))));
        int gen_data_index = gen_number;
        gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_data_index]);
        gen_data->new_allocation = new_allocation_ret;

        dd_surv (dd) = cst;

#ifdef SIMPLE_DPRINTF
        dprintf (1, ("h%d g%d surv: %Id current: %Id alloc: %Id (%d%%) f: %d%% new-size: %Id new-alloc: %Id",
                     heap_number, gen_number, out, current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)),
                     (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));
#else
        dprintf (1,("gen: %d in: %Id out: %Id ", gen_number, generation_allocation_size (generation_of (gen_number)), out));
        dprintf (1,("current: %Id alloc: %Id ", current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd))));
        dprintf (1,(" surv: %d%% f: %d%% new-size: %Id new-alloc: %Id",
                    (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));
#endif //SIMPLE_DPRINTF

        return new_allocation_ret;
    }
}

猜你喜欢