ElasticJobListener 2.1.5 分布式唯一性保证bug修复

最近执行esjob的时候,发现有一定的情况下,出现分布式监听执行了多次的情况,通过对比源码,我们发现一个问题

old version:

 @Override
    public final void beforeJobExecuted(final ShardingContexts shardingContexts) {
        guaranteeService.registerStart(shardingContexts.getShardingItemParameters().keySet());
        
            if (guaranteeService.isAllStarted()) {
                doBeforeJobExecutedAtLastStarted(shardingContexts);
                guaranteeService.clearAllStartedInfo();
                return;
            }
            try {
                Thread.sleep(500);
            }catch (Exception ex){
                ex.printStackTrace();
            }
      

        long before = timeService.getCurrentMillis();
        try {
            synchronized (startedWait) {
                startedWait.wait(startedTimeoutMilliseconds);
            }
        } catch (final InterruptedException ex) {
            Thread.interrupted();
        }
        if (timeService.getCurrentMillis() - before >= startedTimeoutMilliseconds) {
            guaranteeService.clearAllStartedInfo();
            handleTimeout(startedTimeoutMilliseconds);
        }
    }


逻辑是,现在zk注册节点,判断是否全部注册成功,全部成功,默认为最后一个注册的节点,执行befor方法,这个在单点机器上是没有问题的,但是在分布式环境就有问题,如果两个机器,很巧合的同时判断自己是最后一个节点(因为zk是多线程,不能保证时序进入)进入zk节点判断,都会做befor操作,为了解决这个问题,思路如下(只有0片节点判断是否结束,执行监听操作):

循环:

    判断当前为0节点:

        判断是否全部节点注册成功:

                befor

                清除注册zk信息

    锁定


扫描二维码关注公众号,回复: 2355128 查看本文章

其他线程:判断zk信息是否清除成功:解锁


修改后源码如下:

/*
 * Copyright 1999-2015 dangdang.com.
 * <p>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * </p>
 */

package com.dangdang.ddframe.job.lite.api.listener;

import com.dangdang.ddframe.job.exception.JobSystemException;
import com.dangdang.ddframe.job.executor.ShardingContexts;
import com.dangdang.ddframe.job.lite.internal.guarantee.GuaranteeService;
import com.dangdang.ddframe.job.util.env.TimeService;
import lombok.Setter;

/**
 * 在分布式作业中只执行一次的监听器.
 * 
 * @author zhangliang
 */
public abstract class AbstractDistributeOnceElasticJobListener implements ElasticJobListener {
    
    private final long startedTimeoutMilliseconds;
    
    private final Object startedWait = new Object();
    
    private final long completedTimeoutMilliseconds;
    
    private final Object completedWait = new Object();
    
    @Setter
    private GuaranteeService guaranteeService;
    
    private TimeService timeService = new TimeService();
    
    public AbstractDistributeOnceElasticJobListener(final long startedTimeoutMilliseconds, final long completedTimeoutMilliseconds) {
        if (startedTimeoutMilliseconds <= 0L) {
            this.startedTimeoutMilliseconds = Long.MAX_VALUE;
        } else {
            this.startedTimeoutMilliseconds = startedTimeoutMilliseconds;
        }
        if (completedTimeoutMilliseconds <= 0L) {
            this.completedTimeoutMilliseconds = Long.MAX_VALUE; 
        } else {
            this.completedTimeoutMilliseconds = completedTimeoutMilliseconds;
        }
    }
    
    @Override
    public final void beforeJobExecuted(final ShardingContexts shardingContexts) {
        guaranteeService.registerStart(shardingContexts.getShardingItemParameters().keySet());
        long before_li = timeService.getCurrentMillis();
        //许恕:只有0片节点判断是否结束,执行监听操作
        while(shardingContexts.getShardingItemParameters().containsKey(0)){
            if (guaranteeService.isAllStarted()) {
                doBeforeJobExecutedAtLastStarted(shardingContexts);
                guaranteeService.clearAllStartedInfo();
                return;
            }
            try {
                Thread.sleep(500);
            }catch (Exception ex){
                ex.printStackTrace();
            }
            if (timeService.getCurrentMillis() - before_li >= completedTimeoutMilliseconds) {
                guaranteeService.clearAllStartedInfo();
                handleTimeout(completedTimeoutMilliseconds);
            }
        }

        long before = timeService.getCurrentMillis();
        try {
            synchronized (startedWait) {
                startedWait.wait(startedTimeoutMilliseconds);
            }
        } catch (final InterruptedException ex) {
            Thread.interrupted();
        }
        if (timeService.getCurrentMillis() - before >= startedTimeoutMilliseconds) {
            guaranteeService.clearAllStartedInfo();
            handleTimeout(startedTimeoutMilliseconds);
        }
    }
    
    @Override
    public final void afterJobExecuted(final ShardingContexts shardingContexts) {
        guaranteeService.registerComplete(shardingContexts.getShardingItemParameters().keySet());
        long before_li = timeService.getCurrentMillis();
        //许恕:只有0片节点判断是否结束,执行监听操作
        while(shardingContexts.getShardingItemParameters().containsKey(0)) {
            if (guaranteeService.isAllCompleted()) {
                doAfterJobExecutedAtLastCompleted(shardingContexts);
                guaranteeService.clearAllCompletedInfo();
                return;
            }
            try {
                Thread.sleep(500);
            }catch (Exception ex){
                ex.printStackTrace();
            }
            if (timeService.getCurrentMillis() - before_li >= completedTimeoutMilliseconds) {
                guaranteeService.clearAllCompletedInfo();
                handleTimeout(completedTimeoutMilliseconds);
            }
        }
        long before = timeService.getCurrentMillis();
        try {
            synchronized (completedWait) {
                completedWait.wait(completedTimeoutMilliseconds);
            }
        } catch (final InterruptedException ex) {
            Thread.interrupted();
        }
        if (timeService.getCurrentMillis() - before >= completedTimeoutMilliseconds) {
            guaranteeService.clearAllCompletedInfo();
            handleTimeout(completedTimeoutMilliseconds);
        }
    }
    
    private void handleTimeout(final long timeoutMilliseconds) {
        throw new JobSystemException("Job timeout. timeout mills is %s.", timeoutMilliseconds);
    }
    
    /**
     * 分布式环境中最后一个作业执行前的执行的方法.
     *
     * @param shardingContexts 分片上下文
     */
    public abstract void doBeforeJobExecutedAtLastStarted(ShardingContexts shardingContexts);
    
    /**
     * 分布式环境中最后一个作业执行后的执行的方法.
     *
     * @param shardingContexts 分片上下文
     */
    public abstract void doAfterJobExecutedAtLastCompleted(ShardingContexts shardingContexts);
    
    /**
     * 通知任务开始.
     */
    public void notifyWaitingTaskStart() {
        synchronized (startedWait) {
            startedWait.notifyAll();
        }
    }
    
    /**
     * 通知任务结束.
     */
    public void notifyWaitingTaskComplete() {
        synchronized (completedWait) {
            completedWait.notifyAll();
        }
    }
}


猜你喜欢

转载自blog.csdn.net/xvshu/article/details/80987403
今日推荐