cockroach矢量计算

1. 分批次读取

1.1 BatchSize如何设置

源码分析:
sql/colfetcher/colbatch_scan.go

// NewColBatchScan creates a new ColBatchScan operator.
func NewColBatchScan(
    ctx context.Context,
    allocator *colmem.Allocator,
    flowCtx *execinfra.FlowCtx,
    evalCtx *tree.EvalContext,
    spec *execinfrapb.TableReaderSpec,
    post *execinfrapb.PostProcessSpec,
) (*ColBatchScan, error) {
    ...
    limitHint := execinfra.LimitHint(spec.LimitHint, post)
    ...
    s := colBatchScanPool.Get().(*ColBatchScan)
    spans := s.spans[:0]
    for i := range spec.Spans {
        spans = append(spans, spec.Spans[i].Span)
    }
    *s = ColBatchScan{
        ctx:       ctx,
        spans:     spans,
        flowCtx:   flowCtx,
        rf:        fetcher,
        limitHint: limitHint,   // 初始化limitHint
        // Parallelize shouldn't be set when there's a limit hint, but double-check
        // just in case.
        parallelize: spec.Parallelize && limitHint == 0,
        ResultTypes: typs,
    }
    return s, nil
}

开始Scan时设置的limitHint

// Init initializes a ColBatchScan.
func (s *ColBatchScan) Init() {
    s.init = true
    limitBatches := !s.parallelize
    if err := s.rf.StartScan(
        s.ctx, s.flowCtx.Txn, s.spans,
        limitBatches, s.limitHint,  // 传入limitHint
   s.flowCtx.TraceKV,
    ); err != nil {
        colexecerror.InternalError(err)
    }
}

sql/colfetcher/cfetcher.go

// StartScan initializes and starts the key-value scan. Can be used multiple
// times.
func (rf *cFetcher) StartScan(
    ctx context.Context,
    txn *kv.Txn,
    spans roachpb.Spans,
    limitBatches bool,
    limitHint int64,
    traceKV bool,
) error {
    ...
    firstBatchLimit := limitHint
    if firstBatchLimit != 0 {
        // The limitHint is a row limit, but each row could be made up
        // of more than one key. We take the maximum possible keys
        // per row out of all the table rows we could potentially
        // scan over.
        firstBatchLimit = limitHint * int64(rf.maxKeysPerRow)
        // We need an extra key to make sure we form the last row.
        firstBatchLimit++   // 自增1
    }
    // Note that we pass a nil memMonitor here, because the cfetcher does its own
    // memory accounting.
    f, err := row.NewKVFetcher(
        txn,
        spans,
        rf.reverse,
        limitBatches,
        firstBatchLimit,   // 传入firstBatchLimit
        rf.lockStrength,
        rf.lockWaitPolicy,
        nil, /* memMonitor */
    )

    ...
}

sql/row/kv_fetcher.go

// NewKVFetcher creates a new KVFetcher.
// If mon is non-nil, this fetcher will track its fetches and must be Closed.
func NewKVFetcher(
    txn *kv.Txn,
    spans roachpb.Spans,
    reverse bool,
    useBatchLimit bool,
    firstBatchLimit int64,
    lockStrength descpb.ScanLockingStrength,
    lockWaitPolicy descpb.ScanLockingWaitPolicy,
    mon *mon.BytesMonitor,
) (*KVFetcher, error) {
    kvBatchFetcher, err := makeKVBatchFetcher(
        txn, spans, reverse, useBatchLimit, firstBatchLimit, lockStrength, lockWaitPolicy, mon,
    ) //参数传递
    return newKVFetcher(&kvBatchFetcher), err
}

func makeKVBatchFetcher(
    txn *kv.Txn,
    spans roachpb.Spans,
    reverse bool,
    useBatchLimit bool,
    firstBatchLimit int64,
    lockStrength descpb.ScanLockingStrength,
    lockWaitPolicy descpb.ScanLockingWaitPolicy,
    mon *mon.BytesMonitor,
) (txnKVFetcher, error) {
    sendFn := func(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, error) {
        res, err := txn.Send(ctx, ba)
        if err != nil {
            return nil, err.GoError()
        }
        return res, nil
    }
    return makeKVBatchFetcherWithSendFunc(
        sendFn, spans, reverse, useBatchLimit, firstBatchLimit, lockStrength, lockWaitPolicy, mon,
    )  //参数传递
}

// makeKVBatchFetcherWithSendFunc is like makeKVBatchFetcher but uses a custom
// send function.
func makeKVBatchFetcherWithSendFunc(
    sendFn sendFunc,
    spans roachpb.Spans,
    reverse bool,
    useBatchLimit bool,
    firstBatchLimit int64,
    lockStrength descpb.ScanLockingStrength,
    lockWaitPolicy descpb.ScanLockingWaitPolicy,
    mon *mon.BytesMonitor,
) (txnKVFetcher, error) {
    if firstBatchLimit < 0 || (!useBatchLimit && firstBatchLimit != 0) {
        return txnKVFetcher{}, errors.Errorf("invalid batch limit %d (useBatchLimit: %t)",
            firstBatchLimit, useBatchLimit)  
    }

    if useBatchLimit {
        // Verify the spans are ordered if a batch limit is used.
        for i := 1; i < len(spans); i++ {
            if spans[i].Key.Compare(spans[i-1].EndKey) < 0 {
                return txnKVFetcher{}, errors.Errorf("unordered spans (%s %s)", spans[i-1], spans[i])
            }
        }
    } else if util.RaceEnabled {
        // Otherwise, just verify the spans don't contain consecutive overlapping
        // spans.
        for i := 1; i < len(spans); i++ {
            if spans[i].Key.Compare(spans[i-1].EndKey) >= 0 {
                // Current span's start key is greater than or equal to the last span's
                // end key - we're good.
                continue
            } else if spans[i].EndKey.Compare(spans[i-1].Key) < 0 {
                // Current span's end key is less than or equal to the last span's start
                // key - also good.
                continue
            }
            // Otherwise, the two spans overlap, which isn't allowed - it leaves us at
            // risk of incorrect results, since the row fetcher can't distinguish
            // between identical rows in two different batches.
            return txnKVFetcher{}, errors.Errorf("overlapping neighbor spans (%s %s)", spans[i-1], spans[i])
        }
    }

    // Make a copy of the spans because we update them.
    copySpans := make(roachpb.Spans, len(spans))
    for i := range spans {
        if reverse {
            // Reverse scans receive the spans in decreasing order.
            copySpans[len(spans)-i-1] = spans[i]
        } else {
            copySpans[i] = spans[i]
        }
    }

    return txnKVFetcher{
        sendFn:          sendFn,
        spans:           copySpans,
        reverse:         reverse,
        useBatchLimit:   useBatchLimit,
        firstBatchLimit: firstBatchLimit,    //参数传递
        lockStrength:    lockStrength,
        lockWaitPolicy:  lockWaitPolicy,
        mon:             mon,
        acc:             mon.MakeBoundAccount(),
    }, nil
}

sql/row/kv_batch_fetcher.go

// fetch retrieves spans from the kv layer.
func (f *txnKVFetcher) fetch(ctx context.Context) error {
    var ba roachpb.BatchRequest
    ba.Header.WaitPolicy = f.getWaitPolicy()
    ba.Header.MaxSpanRequestKeys = f.getBatchSize()   //参数传递
... 
}
// getBatchSize returns the max size of the next batch.
func (f *txnKVFetcher) getBatchSize() int64 {
    return f.getBatchSizeForIdx(f.batchIdx)
}

func (f *txnKVFetcher) getBatchSizeForIdx(batchIdx int) int64 {
    if !f.useBatchLimit {
        return 0
    }
    if f.firstBatchLimit == 0 || f.firstBatchLimit >= kvBatchSize {
        return kvBatchSize
    }

    // We grab the first batch according to the limit. If it turns out that we
    // need another batch, we grab a bigger batch. If that's still not enough,
    // we revert to the default batch size.
    switch batchIdx {
    case 0:
        return f.firstBatchLimit

    case 1:
        // Make the second batch 10 times larger (but at most the default batch
        // size and at least 1/10 of the default batch size). Sample
        // progressions of batch sizes:
        //
        //  First batch | Second batch | Subsequent batches
        //  -----------------------------------------------
        //         1    |     1,000     |     10,000
        //       100    |     1,000     |     10,000
        //       500    |     5,000     |     10,000
        //      1000    |    10,000     |     10,000
        secondBatch := f.firstBatchLimit * 10
        switch {
        case secondBatch < kvBatchSize/10:
            return kvBatchSize / 10
        case secondBatch > kvBatchSize:
            return kvBatchSize
        default:
            return secondBatch
        }

    default:
        return kvBatchSize
    }
}

猜你喜欢

转载自blog.51cto.com/1196740/2569289