From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Aikar Date: Sun, 12 Apr 2020 15:50:48 -0400 Subject: [PATCH] Improved Watchdog Support Forced Watchdog Crash support and Improve Async Shutdown If the request to shut down the server is received while we are in a watchdog hang, immediately treat it as a crash and begin the shutdown process. Shutdown process is now improved to also shutdown cleanly when not using restart scripts either. If a server is deadlocked, a server owner can send SIGUP (or any other signal the JVM understands to shut down as it currently does) and the watchdog will no longer need to wait until the full timeout, allowing you to trigger a close process and try to shut the server down gracefully, saving player and world data. Previously there was no way to trigger this outside of waiting for a full watchdog timeout, which may be set to a really long time... Additionally, fix everything to do with shutting the server down asynchronously. Previously, nearly everything about the process was fragile and unsafe. Main might not have actually been frozen, and might still be manipulating state. Or, some reuest might ask main to do something in the shutdown but main is dead. Or worse, other things might start closing down items such as the Console or Thread Pool before we are fully shutdown. This change tries to resolve all of these issues by moving everything into the stop method and guaranteeing only one thread is stopping the server. We then issue Thread Death to the main thread of another thread initiates the stop process. We have to ensure Thread Death propagates correctly though to stop main completely. This is to ensure that if main isn't truely stuck, it's not manipulating state we are trying to save. This also moves all plugins who register "delayed init" tasks to occur just before "Done" so they are properly accounted for and wont trip watchdog on init. diff --git a/src/main/java/com/destroystokyo/paper/Metrics.java b/src/main/java/com/destroystokyo/paper/Metrics.java index 0b9e689d57705965721b5c55bc45d36657f360e4..dee00aac05f1acf050f05d4db557a08dd0f301c8 100644 --- a/src/main/java/com/destroystokyo/paper/Metrics.java +++ b/src/main/java/com/destroystokyo/paper/Metrics.java @@ -92,7 +92,12 @@ public class Metrics { * Starts the Scheduler which submits our data every 30 minutes. */ private void startSubmitting() { - final Runnable submitTask = this::submitData; + final Runnable submitTask = () -> { + if (MinecraftServer.getServer().hasStopped()) { + return; + } + submitData(); + }; // Many servers tend to restart at a fixed time at xx:00 which causes an uneven distribution of requests on the // bStats backend. To circumvent this problem, we introduce some randomness into the initial and second delay. diff --git a/src/main/java/net/minecraft/CrashReport.java b/src/main/java/net/minecraft/CrashReport.java index 4008fbe506e74f2c463dc7b12f5dd0f3b6fc342d..766ab2fe536a2acccaec28e922ccf8993b0790dc 100644 --- a/src/main/java/net/minecraft/CrashReport.java +++ b/src/main/java/net/minecraft/CrashReport.java @@ -257,6 +257,7 @@ public class CrashReport { } public static CrashReport forThrowable(Throwable cause, String title) { + if (cause instanceof ThreadDeath) com.destroystokyo.paper.util.SneakyThrow.sneaky(cause); // Paper while (cause instanceof CompletionException && cause.getCause() != null) { cause = cause.getCause(); } diff --git a/src/main/java/net/minecraft/Util.java b/src/main/java/net/minecraft/Util.java index c2f747226f10479c826849af898538610a2dd659..83f9f97586f8c0e9d228923e4fec6f121a6702e2 100644 --- a/src/main/java/net/minecraft/Util.java +++ b/src/main/java/net/minecraft/Util.java @@ -129,6 +129,7 @@ public class Util { return Util.IO_POOL; } + public static void shutdownServerThreadPool() { shutdownExecutors(); } // Paper - OBFHELPER public static void shutdownExecutors() { shutdownExecutor(Util.BACKGROUND_EXECUTOR); shutdownExecutor(Util.IO_POOL); diff --git a/src/main/java/net/minecraft/server/MinecraftServer.java b/src/main/java/net/minecraft/server/MinecraftServer.java index 99ee9de92264381a064066bc22bb66b4b2852a2e..e5ad635a480d32e7a10ee92c65cfc18a98beafad 100644 --- a/src/main/java/net/minecraft/server/MinecraftServer.java +++ b/src/main/java/net/minecraft/server/MinecraftServer.java @@ -269,7 +269,7 @@ public abstract class MinecraftServer extends ReentrantBlockableEventLoop S spin(Function serverFactory) { AtomicReference atomicreference = new AtomicReference(); Thread thread = new Thread(() -> { @@ -851,6 +854,7 @@ public abstract class MinecraftServer extends ReentrantBlockableEventLoop { + world.tickingEntities = false; + }); + } + // Paper end // CraftBukkit end MinecraftServer.LOGGER.info("Stopping server"); MinecraftTimings.stopServer(); // Paper @@ -930,7 +951,18 @@ public abstract class MinecraftServer extends ReentrantBlockableEventLoop {}; + } + // Paper end return new TickTask(this.tickCount, runnable); } @@ -1421,6 +1478,7 @@ public abstract class MinecraftServer extends ReentrantBlockableEventLoop { CompletableFuture completablefuture; diff --git a/src/main/java/net/minecraft/server/level/ServerLevel.java b/src/main/java/net/minecraft/server/level/ServerLevel.java index b9978d296b83e73d3395b8254c0e8ccd9b36d0fa..bf4e50cd1d561456c033cda2d5c5487c5e3fe1eb 100644 --- a/src/main/java/net/minecraft/server/level/ServerLevel.java +++ b/src/main/java/net/minecraft/server/level/ServerLevel.java @@ -171,7 +171,7 @@ public class ServerLevel extends net.minecraft.world.level.Level implements Worl private final Queue toAddAfterTick = Queues.newArrayDeque(); public final List players = Lists.newArrayList(); // Paper - private -> public public final ServerChunkCache chunkSource; // Paper - public - boolean tickingEntities; + public boolean tickingEntities; // Paper - expose for watchdog // Paper start List afterEntityTickingTasks = Lists.newArrayList(); public void doIfNotEntityTicking(java.lang.Runnable run) { diff --git a/src/main/java/net/minecraft/server/players/PlayerList.java b/src/main/java/net/minecraft/server/players/PlayerList.java index 30666fca36b683158ff60302684b5093f5536e24..984ac19dcab446531c816e365c7c149e2c49d567 100644 --- a/src/main/java/net/minecraft/server/players/PlayerList.java +++ b/src/main/java/net/minecraft/server/players/PlayerList.java @@ -503,7 +503,7 @@ public abstract class PlayerList { cserver.getPluginManager().callEvent(playerQuitEvent); entityplayer.getBukkitEntity().disconnect(playerQuitEvent.getQuitMessage()); - entityplayer.doTick(); // SPIGOT-924 + if (server.isSameThread()) entityplayer.doTick(); // SPIGOT-924 // Paper - don't tick during emergency shutdowns (Watchdog) // CraftBukkit end // Paper start - Remove from collideRule team if needed diff --git a/src/main/java/net/minecraft/util/thread/BlockableEventLoop.java b/src/main/java/net/minecraft/util/thread/BlockableEventLoop.java index a5ce61be7d6e85ac289730d9671e66a7190529f9..add18ba4833686ff51fbb280b0a5759f142b3f91 100644 --- a/src/main/java/net/minecraft/util/thread/BlockableEventLoop.java +++ b/src/main/java/net/minecraft/util/thread/BlockableEventLoop.java @@ -135,6 +135,7 @@ public abstract class BlockableEventLoop implements Processo try { task.run(); } catch (Exception exception) { + if (exception.getCause() instanceof ThreadDeath) throw exception; // Paper BlockableEventLoop.LOGGER.fatal("Error executing task on {}", this.name(), exception); } diff --git a/src/main/java/net/minecraft/world/level/Level.java b/src/main/java/net/minecraft/world/level/Level.java index 632f32405053fbcff2fd26fa99f98c6add9f9dc7..5860e7866724abd35bde2a5710d9c92799e5de67 100644 --- a/src/main/java/net/minecraft/world/level/Level.java +++ b/src/main/java/net/minecraft/world/level/Level.java @@ -858,6 +858,7 @@ public abstract class Level implements LevelAccessor, AutoCloseable { gameprofilerfiller.pop(); } catch (Throwable throwable) { + if (throwable instanceof ThreadDeath) throw throwable; // Paper // Paper start - Prevent tile entity and entity crashes String msg = "TileEntity threw exception at " + tileentity.getLevel().getWorld().getName() + ":" + tileentity.getBlockPos().getX() + "," + tileentity.getBlockPos().getY() + "," + tileentity.getBlockPos().getZ(); System.err.println(msg); @@ -932,6 +933,7 @@ public abstract class Level implements LevelAccessor, AutoCloseable { try { tickConsumer.accept(entity); } catch (Throwable throwable) { + if (throwable instanceof ThreadDeath) throw throwable; // Paper // Paper start - Prevent tile entity and entity crashes String msg = "Entity threw exception at " + entity.level.getWorld().getName() + ":" + entity.getX() + "," + entity.getY() + "," + entity.getZ(); System.err.println(msg); diff --git a/src/main/java/org/bukkit/craftbukkit/CraftServer.java b/src/main/java/org/bukkit/craftbukkit/CraftServer.java index 3f35e93b42efd03ff1002f09962fe3da51fb4c3f..43c37e660a8a7f9d326ad38e66f9aa7c53c7b87c 100644 --- a/src/main/java/org/bukkit/craftbukkit/CraftServer.java +++ b/src/main/java/org/bukkit/craftbukkit/CraftServer.java @@ -1839,7 +1839,7 @@ public final class CraftServer implements Server { @Override public boolean isPrimaryThread() { - return Thread.currentThread().equals(console.serverThread); // Paper - Fix issues with detecting main thread properly + return Thread.currentThread().equals(console.serverThread) || Thread.currentThread().equals(net.minecraft.server.MinecraftServer.getServer().shutdownThread); // Paper - Fix issues with detecting main thread properly, the only time Watchdog will be used is during a crash shutdown which is a "try our best" scenario } // Paper start diff --git a/src/main/java/org/bukkit/craftbukkit/Main.java b/src/main/java/org/bukkit/craftbukkit/Main.java index c519ceca6f7788ca7c5d74ad1001dbc09f62681c..c288b89bf5a22269823ba1d18af217032d7c6a36 100644 --- a/src/main/java/org/bukkit/craftbukkit/Main.java +++ b/src/main/java/org/bukkit/craftbukkit/Main.java @@ -12,6 +12,8 @@ import java.util.logging.Level; import java.util.logging.Logger; import joptsimple.OptionParser; import joptsimple.OptionSet; +import net.minecraft.util.ExceptionCollector; +import net.minecraft.world.level.lighting.LayerLightEventListener; import net.minecrell.terminalconsole.TerminalConsoleAppender; // Paper public class Main { @@ -156,6 +158,36 @@ public class Main { OptionSet options = null; + // Paper start - preload logger classes to avoid plugins mixing versions + tryPreloadClass("com.destroystokyo.paper.log.LogFullPolicy"); + tryPreloadClass("org.apache.logging.log4j.core.Core"); + tryPreloadClass("org.apache.logging.log4j.core.Appender"); + tryPreloadClass("org.apache.logging.log4j.core.ContextDataInjector"); + tryPreloadClass("org.apache.logging.log4j.core.Filter"); + tryPreloadClass("org.apache.logging.log4j.core.ErrorHandler"); + tryPreloadClass("org.apache.logging.log4j.core.LogEvent"); + tryPreloadClass("org.apache.logging.log4j.core.Logger"); + tryPreloadClass("org.apache.logging.log4j.core.LoggerContext"); + tryPreloadClass("org.apache.logging.log4j.core.LogEventListener"); + tryPreloadClass("org.apache.logging.log4j.core.AbstractLogEvent"); + tryPreloadClass("org.apache.logging.log4j.message.AsynchronouslyFormattable"); + tryPreloadClass("org.apache.logging.log4j.message.FormattedMessage"); + tryPreloadClass("org.apache.logging.log4j.message.ParameterizedMessage"); + tryPreloadClass("org.apache.logging.log4j.message.Message"); + tryPreloadClass("org.apache.logging.log4j.message.MessageFactory"); + tryPreloadClass("org.apache.logging.log4j.message.TimestampMessage"); + tryPreloadClass("org.apache.logging.log4j.message.SimpleMessage"); + tryPreloadClass("org.apache.logging.log4j.core.async.AsyncLogger"); + tryPreloadClass("org.apache.logging.log4j.core.async.AsyncLoggerContext"); + tryPreloadClass("org.apache.logging.log4j.core.async.AsyncQueueFullPolicy"); + tryPreloadClass("org.apache.logging.log4j.core.async.AsyncLoggerDisruptor"); + tryPreloadClass("org.apache.logging.log4j.core.async.RingBufferLogEvent"); + tryPreloadClass("org.apache.logging.log4j.core.async.DisruptorUtil"); + tryPreloadClass("org.apache.logging.log4j.core.async.RingBufferLogEventHandler"); + tryPreloadClass("org.apache.logging.log4j.core.impl.ThrowableProxy"); + tryPreloadClass("org.apache.logging.log4j.core.impl.ExtendedClassInfo"); + tryPreloadClass("org.apache.logging.log4j.core.impl.ExtendedStackTraceElement"); + // Paper end try { options = parser.parse(args); } catch (joptsimple.OptionException ex) { @@ -251,8 +283,64 @@ public class Main { } catch (Throwable t) { t.printStackTrace(); } + // Paper start + // load some required classes to avoid errors during shutdown if jar is replaced + // also to guarantee our version loads over plugins + tryPreloadClass("com.destroystokyo.paper.util.SneakyThrow"); + tryPreloadClass("com.google.common.collect.Iterators$PeekingImpl"); + tryPreloadClass("com.google.common.collect.MapMakerInternalMap$Values"); + tryPreloadClass("com.google.common.collect.MapMakerInternalMap$ValueIterator"); + tryPreloadClass("com.google.common.collect.MapMakerInternalMap$WriteThroughEntry"); + tryPreloadClass("com.google.common.collect.Iterables"); + for (int i = 1; i <= 15; i++) { + tryPreloadClass("com.google.common.collect.Iterables$" + i, false); + } + tryPreloadClass("org.apache.commons.lang3.mutable.MutableBoolean"); + tryPreloadClass("org.apache.commons.lang3.mutable.MutableInt"); + tryPreloadClass("org.jline.terminal.impl.MouseSupport"); + tryPreloadClass("org.jline.terminal.impl.MouseSupport$1"); + tryPreloadClass("org.jline.terminal.Terminal$MouseTracking"); + tryPreloadClass("co.aikar.timings.TimingHistory"); + tryPreloadClass("co.aikar.timings.TimingHistory$MinuteReport"); + tryPreloadClass("io.netty.channel.AbstractChannelHandlerContext"); + tryPreloadClass("io.netty.channel.AbstractChannelHandlerContext$11"); + tryPreloadClass("io.netty.channel.AbstractChannelHandlerContext$12"); + tryPreloadClass("io.netty.channel.AbstractChannel$AbstractUnsafe$8"); + tryPreloadClass("io.netty.util.concurrent.DefaultPromise"); + tryPreloadClass("io.netty.util.concurrent.DefaultPromise$1"); + tryPreloadClass("io.netty.util.internal.PromiseNotificationUtil"); + tryPreloadClass("io.netty.util.internal.SystemPropertyUtil"); + tryPreloadClass("org.bukkit.craftbukkit.scheduler.CraftScheduler"); + tryPreloadClass("org.bukkit.craftbukkit.scheduler.CraftScheduler$1"); + tryPreloadClass("org.bukkit.craftbukkit.scheduler.CraftScheduler$2"); + tryPreloadClass("org.bukkit.craftbukkit.scheduler.CraftScheduler$3"); + tryPreloadClass("org.bukkit.craftbukkit.scheduler.CraftScheduler$4"); + tryPreloadClass("org.slf4j.helpers.MessageFormatter"); + tryPreloadClass("org.slf4j.helpers.FormattingTuple"); + tryPreloadClass("org.slf4j.helpers.BasicMarker"); + tryPreloadClass("org.slf4j.helpers.Util"); + tryPreloadClass("com.destroystokyo.paper.event.player.PlayerConnectionCloseEvent"); + tryPreloadClass("com.destroystokyo.paper.event.entity.EntityRemoveFromWorldEvent"); + // Minecraft, seen during saving + tryPreloadClass(LayerLightEventListener.DummyLightLayerEventListener.class.getName()); + tryPreloadClass(LayerLightEventListener.class.getName()); + tryPreloadClass(ExceptionCollector.class.getName()); + // Paper end + } + } + + // Paper start + private static void tryPreloadClass(String className) { + tryPreloadClass(className, true); + } + private static void tryPreloadClass(String className, boolean printError) { + try { + Class.forName(className); + } catch (ClassNotFoundException e) { + if (printError) System.err.println("An expected class " + className + " was not found for preloading: " + e.getMessage()); } } + // Paper end private static List asList(String... params) { return Arrays.asList(params); diff --git a/src/main/java/org/bukkit/craftbukkit/util/ServerShutdownThread.java b/src/main/java/org/bukkit/craftbukkit/util/ServerShutdownThread.java index 449e99d1b673870ed6892f6ab2c715a2db35c35d..c7ed6e0f8a989cec97700df2b15198c9c481c549 100644 --- a/src/main/java/org/bukkit/craftbukkit/util/ServerShutdownThread.java +++ b/src/main/java/org/bukkit/craftbukkit/util/ServerShutdownThread.java @@ -12,12 +12,27 @@ public class ServerShutdownThread extends Thread { @Override public void run() { try { + // Paper start - try to shutdown on main + server.safeShutdown(false, false); + for (int i = 1000; i > 0 && !server.hasStopped(); i -= 100) { + Thread.sleep(100); + } + if (server.hasStopped()) { + while (!server.hasFullyShutdown) Thread.sleep(1000); + return; + } + // Looks stalled, close async org.spigotmc.AsyncCatcher.enabled = false; // Spigot org.spigotmc.AsyncCatcher.shuttingDown = true; // Paper + server.forceTicks = true; server.close(); + while (!server.hasFullyShutdown) Thread.sleep(1000); + } catch (InterruptedException e) { + e.printStackTrace(); + // Paper end } finally { try { - net.minecrell.terminalconsole.TerminalConsoleAppender.close(); // Paper - Use TerminalConsoleAppender + //net.minecrell.terminalconsole.TerminalConsoleAppender.close(); // Paper - Move into stop } catch (Exception e) { } } diff --git a/src/main/java/org/spigotmc/RestartCommand.java b/src/main/java/org/spigotmc/RestartCommand.java index 6dab105cd7cc4340c031c395c0346d4731355d79..6498dc4c6630bfef1a52edf74d8574e5e4876720 100644 --- a/src/main/java/org/spigotmc/RestartCommand.java +++ b/src/main/java/org/spigotmc/RestartCommand.java @@ -139,7 +139,7 @@ public class RestartCommand extends Command // Paper end // Paper start - copied from above and modified to return if the hook registered - private static boolean addShutdownHook(String restartScript) + public static boolean addShutdownHook(String restartScript) { String[] split = restartScript.split( " " ); if ( split.length > 0 && new File( split[0] ).isFile() ) diff --git a/src/main/java/org/spigotmc/WatchdogThread.java b/src/main/java/org/spigotmc/WatchdogThread.java index 33a66322d253c7562ae5acbdbc6cc87f7d72a9af..26c9adf7af4328ce2d8e08568019c5b438e28b05 100644 --- a/src/main/java/org/spigotmc/WatchdogThread.java +++ b/src/main/java/org/spigotmc/WatchdogThread.java @@ -13,6 +13,7 @@ import org.bukkit.Bukkit; public class WatchdogThread extends Thread { + public static final boolean DISABLE_WATCHDOG = Boolean.getBoolean("disable.watchdog"); // Paper private static WatchdogThread instance; private long timeoutTime; private boolean restart; @@ -41,6 +42,7 @@ public class WatchdogThread extends Thread { if ( instance == null ) { + if (timeoutTime <= 0) timeoutTime = 300; // Paper instance = new WatchdogThread( timeoutTime * 1000L, restart ); instance.start(); } else @@ -71,12 +73,13 @@ public class WatchdogThread extends Thread // Paper start Logger log = Bukkit.getServer().getLogger(); long currentTime = monotonicMillis(); - if ( lastTick != 0 && timeoutTime > 0 && currentTime > lastTick + earlyWarningEvery && !Boolean.getBoolean("disable.watchdog") ) + MinecraftServer server = MinecraftServer.getServer(); + if (lastTick != 0 && timeoutTime > 0 && hasStarted && (!server.isRunning() || (currentTime > lastTick + earlyWarningEvery && !DISABLE_WATCHDOG) )) { - boolean isLongTimeout = currentTime > lastTick + timeoutTime; + boolean isLongTimeout = currentTime > lastTick + timeoutTime || (!server.isRunning() && !server.hasStopped() && currentTime > lastTick + 1000); // Don't spam early warning dumps if ( !isLongTimeout && (earlyWarningEvery <= 0 || !hasStarted || currentTime < lastEarlyWarning + earlyWarningEvery || currentTime < lastTick + earlyWarningDelay)) continue; - if ( !isLongTimeout && MinecraftServer.getServer().hasStopped()) continue; // Don't spam early watchdog warnings during shutdown, we'll come back to this... + if ( !isLongTimeout && server.hasStopped()) continue; // Don't spam early watchdog warnings during shutdown, we'll come back to this... lastEarlyWarning = currentTime; if (isLongTimeout) { // Paper end @@ -118,7 +121,7 @@ public class WatchdogThread extends Thread log.log( Level.SEVERE, "------------------------------" ); log.log( Level.SEVERE, "Server thread dump (Look for plugins here before reporting to Paper!):" ); // Paper ChunkTaskManager.dumpAllChunkLoadInfo(); // Paper - dumpThread( ManagementFactory.getThreadMXBean().getThreadInfo( MinecraftServer.getServer().serverThread.getId(), Integer.MAX_VALUE ), log ); + dumpThread( ManagementFactory.getThreadMXBean().getThreadInfo( server.serverThread.getId(), Integer.MAX_VALUE ), log ); log.log( Level.SEVERE, "------------------------------" ); // // Paper start - Only print full dump on long timeouts @@ -139,9 +142,25 @@ public class WatchdogThread extends Thread if ( isLongTimeout ) { - if ( restart && !MinecraftServer.getServer().hasStopped() ) + if ( !server.hasStopped() ) { - RestartCommand.restart(); + AsyncCatcher.enabled = false; // Disable async catcher incase it interferes with us + AsyncCatcher.shuttingDown = true; + server.forceTicks = true; + if (restart) { + RestartCommand.addShutdownHook( SpigotConfig.restartScript ); + } + // try one last chance to safe shutdown on main incase it 'comes back' + server.abnormalExit = true; + server.safeShutdown(false, restart); + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + if (!server.hasStopped()) { + server.close(); + } } break; } // Paper end diff --git a/src/main/resources/log4j2.xml b/src/main/resources/log4j2.xml index 476f4a5cbe664ddd05474cb88553018bd334a5b8..8af159abd3d0cc94cf155fec5b384c42f69551bf 100644 --- a/src/main/resources/log4j2.xml +++ b/src/main/resources/log4j2.xml @@ -1,5 +1,5 @@ - +