ContainersMonitorImpl.MonitoringThread.run
When memory exceeded, the monitor thread will sends a ContainerKillEvent.
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
eventDispatcher.getEventHandler().handle(
new ContainerKillEvent(containerId,
containerExitStatus, msg));
ContainerKillEvent is a event of type ContainerEventType.KILL_CONTAINER.
public ContainerKillEvent(ContainerId cID,
int exitStatus, String diagnostic) {
super(cID, ContainerEventType.KILL_CONTAINER);
this.exitStatus = exitStatus;
this.diagnostic = diagnostic;
}
ContainerImpl.addTransition
ContainerImpl will call KillTransition when recieves ContainerEventType.KILL_CONTAINER event.
addTransition(ContainerState.RUNNING, ContainerState.KILLING,
ContainerEventType.KILL_CONTAINER, new KillTransition())
static class KillTransition implements
SingleArcTransition<ContainerImpl, ContainerEvent> {
@Override
public void transition(ContainerImpl container, ContainerEvent event) {
// Kill the process/process-grp
container.dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(container,
ContainersLauncherEventType.CLEANUP_CONTAINER));
ContainerKillEvent killEvent = (ContainerKillEvent) event;
container.addDiagnostics(killEvent.getDiagnostic(), "\n");
container.exitCode = killEvent.getContainerExitStatus();
}
}
ContainersLauncher.handler
case CLEANUP_CONTAINER:
ContainerLaunch launcher = running.remove(containerId);
if (launcher == null) {
// Container not launched. So nothing needs to be done.
return;
}
// Cleanup a container whether it is running/killed/completed, so that
// no sub-processes are alive.
try {
launcher.cleanupContainer();
} catch (IOException e) {
LOG.warn("Got exception while cleaning container " + containerId
+ ". Ignoring.");
}
break;
}
ContainerLaunch.cleanupContainer
public void cleanupContainer() throws IOException {
ContainerId containerId = container.getContainerId();
String containerIdStr = ConverterUtils.toString(containerId);
LOG.info("Cleaning up container " + containerIdStr);
try {
context.getNMStateStore().storeContainerKilled(containerId);
} catch (IOException e) {
LOG.error("Unable to mark container " + containerId
+ " killed in store", e);
}
// launch flag will be set to true if process already launched
boolean alreadyLaunched = !shouldLaunchContainer.compareAndSet(false, true);
if (!alreadyLaunched) {
LOG.info("Container " + containerIdStr + " not launched."
+ " No cleanup needed to be done");
return;
}
LOG.debug("Marking container " + containerIdStr + " as inactive");
// this should ensure that if the container process has not launched
// by this time, it will never be launched
exec.deactivateContainer(containerId);
if (LOG.isDebugEnabled()) {
LOG.debug("Getting pid for container " + containerIdStr + " to kill"
+ " from pid file "
+ (pidFilePath != null ? pidFilePath.toString() : "null"));
}
// however the container process may have already started
try {
// get process id from pid file if available
// else if shell is still active, get it from the shell
String processId = null;
if (pidFilePath != null) {
processId = getContainerPid(pidFilePath);
}
// kill process
if (processId != null) {
String user = container.getUser();
LOG.debug("Sending signal to pid " + processId
+ " as user " + user
+ " for container " + containerIdStr);
final Signal signal = sleepDelayBeforeSigKill > 0
? Signal.TERM
: Signal.KILL;
boolean result = exec.signalContainer(user, processId, signal);
LOG.debug("Sent signal " + signal + " to pid " + processId
+ " as user " + user
+ " for container " + containerIdStr
+ ", result=" + (result? "success" : "failed"));
if (sleepDelayBeforeSigKill > 0) {
new DelayedProcessKiller(container, user,
processId, sleepDelayBeforeSigKill, Signal.KILL, exec).start();
}
}
} catch (Exception e) {
String message =
"Exception when trying to cleanup container " + containerIdStr
+ ": " + StringUtils.stringifyException(e);
LOG.warn(message);
dispatcher.getEventHandler().handle(
new ContainerDiagnosticsUpdateEvent(containerId, message));
} finally {
// cleanup pid file if present
if (pidFilePath != null) {
FileContext lfs = FileContext.getLocalFSFileContext();
lfs.delete(pidFilePath, false);
lfs.delete(pidFilePath.suffix(EXIT_CODE_FILE_SUFFIX), false);
}
}
}
boolean result = exec.signalContainer(user, processId, signal);
to send kill command.
DefaultContainerExecutor.killContainer
protected void killContainer(String pid, Signal signal) throws IOException {
new ShellCommandExecutor(Shell.getSignalKillCommand(signal.getValue(), pid))
.execute();
}
Shell.getSignalKillCommand
public static String[] getSignalKillCommand(int code, String pid) {
return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } :
new String[] { "kill", "-" + code, isSetsidAvailable ? "-" + pid : pid };
}
ContainerLaunch.call
If kill a container,ret
code is not 0.
ret = exec.launchContainer(container, nmPrivateContainerScriptPath,
nmPrivateTokensPath, user, appIdStr, containerWorkDir,
localDirs, logDirs);
if (ret != 0) {
LOG.warn("Container exited with a non-zero exit code " + ret);
this.dispatcher.getEventHandler().handle(new ContainerExitEvent(
containerID,
ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
"Container exited with a non-zero exit code " + ret));
return ret;
}
ContainerImpl
ContainerImpl calls ExitedWithFailureTransition
, and transit status to CONTAINER_EXITED_WITH_FAILURE
.addTransition(ContainerState.RUNNING,
ContainerState.EXITED_WITH_FAILURE,
ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
new ExitedWithFailureTransition(true))
ExitedWithFailureTransition
/**
* Transition to EXITED_WITH_FAILURE state upon
* CONTAINER_EXITED_WITH_FAILURE state.
**/
@SuppressWarnings("unchecked") // dispatcher not typed
static class ExitedWithFailureTransition extends ContainerTransition {
boolean clCleanupRequired;
public ExitedWithFailureTransition(boolean clCleanupRequired) {
this.clCleanupRequired = clCleanupRequired;
}
@Override
public void transition(ContainerImpl container, ContainerEvent event) {
ContainerExitEvent exitEvent = (ContainerExitEvent) event;
container.exitCode = exitEvent.getExitCode();
if (exitEvent.getDiagnosticInfo() != null) {
container.addDiagnostics(exitEvent.getDiagnosticInfo(), "\n");
}
// TODO: Add containerWorkDir to the deletion service.
// TODO: Add containerOuputDir to the deletion service.
if (clCleanupRequired) {
container.dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(container,
ContainersLauncherEventType.CLEANUP_CONTAINER));
}
container.cleanup();
}
}
handleCleanupContainerResources
private void handleCleanupContainerResources(
ContainerLocalizationCleanupEvent rsrcCleanup) {
Container c = rsrcCleanup.getContainer();
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> rsrcs =
rsrcCleanup.getResources();
for (Map.Entry<LocalResourceVisibility, Collection<LocalResourceRequest>> e :
rsrcs.entrySet()) {
LocalResourcesTracker tracker = getLocalResourcesTracker(e.getKey(), c.getUser(),
c.getContainerId().getApplicationAttemptId()
.getApplicationId());
for (LocalResourceRequest req : e.getValue()) {
tracker.handle(new ResourceReleaseEvent(req,
c.getContainerId()));
}
}
String locId = ConverterUtils.toString(c.getContainerId());
localizerTracker.cleanupPrivLocalizers(locId);
// Delete the container directories
String userName = c.getUser();
String containerIDStr = c.toString();
String appIDStr = ConverterUtils.toString(
c.getContainerId().getApplicationAttemptId().getApplicationId());
// Try deleting from good local dirs and full local dirs because a dir might
// have gone bad while the app was running(disk full). In addition
// a dir might have become good while the app was running.
// Check if the container dir exists and if it does, try to delete it
for (String localDir : dirsHandler.getLocalDirsForCleanup()) {
// Delete the user-owned container-dir
Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
Path userdir = new Path(usersdir, userName);
Path allAppsdir = new Path(userdir, ContainerLocalizer.APPCACHE);
Path appDir = new Path(allAppsdir, appIDStr);
Path containerDir = new Path(appDir, containerIDStr);
submitDirForDeletion(userName, containerDir);
// Delete the nmPrivate container-dir
Path sysDir = new Path(localDir, NM_PRIVATE_DIR);
Path appSysDir = new Path(sysDir, appIDStr);
Path containerSysDir = new Path(appSysDir, containerIDStr);
submitDirForDeletion(null, containerSysDir);
}
dispatcher.getEventHandler().handle(
new ContainerEvent(c.getContainerId(),
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP));
}
ContainerImpl
When at state EXITED_WITH_FAILURE
, receive CONTAINER_RESOURCES_CLEANEDUP, calls ExitedWithFailureToDoneTransition.
.addTransition(ContainerState.EXITED_WITH_FAILURE, ContainerState.DONE,
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP,
new ExitedWithFailureToDoneTransition())
static class ExitedWithFailureToDoneTransition extends
ContainerDoneTransition {
@Override
public void transition(ContainerImpl container, ContainerEvent event) {
if (container.wasLaunched) {
container.metrics.endRunningContainer();
}
container.metrics.failedContainer();
NMAuditLogger.logFailure(container.user,
AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl",
"Container failed with state: " + container.getContainerState(),
container.containerId.getApplicationAttemptId().getApplicationId(),
container.containerId);
super.transition(container, event);
}
}
ContainerDoneTransition
static class ContainerDoneTransition implements
SingleArcTransition<ContainerImpl, ContainerEvent> {
@Override
@SuppressWarnings("unchecked")
public void transition(ContainerImpl container, ContainerEvent event) {
container.metrics.releaseContainer(container.resource);
container.sendFinishedEvents();
//if the current state is NEW it means the CONTAINER_INIT was never
// sent for the event, thus no need to send the CONTAINER_STOP
if (container.getCurrentState()
!= org.apache.hadoop.yarn.api.records.ContainerState.NEW) {
container.dispatcher.getEventHandler().handle(new AuxServicesEvent
(AuxServicesEventType.CONTAINER_STOP, container));
}
container.context.getNodeStatusUpdater().sendOutofBandHeartBeat();
}
}
ContainerImpl.sendFinishedEvents
private void sendFinishedEvents() {
// Inform the application
@SuppressWarnings("rawtypes")
EventHandler eventHandler = dispatcher.getEventHandler();
eventHandler.handle(new ApplicationContainerFinishedEvent(containerId));
// Remove the container from the resource-monitor
eventHandler.handle(new ContainerStopMonitoringEvent(containerId));
// Tell the logService too
eventHandler.handle(new LogHandlerContainerFinishedEvent(
containerId, exitCode));
}