Fixed most bugs with spawning services (last: block signals)

This commit is contained in:
nocturn9x 2021-12-04 17:31:18 +01:00
parent 8517cdb736
commit 41a54288e3
6 changed files with 133 additions and 86 deletions

View File

@ -240,7 +240,6 @@ proc mountRealDisks*(logger: Logger, fstab: string = "/etc/fstab") =
if checkDiskIsMounted(entry, expand=true):
logger.debug(&"Skipping mounting filesystem {entry.source} ({entry.fstype}) at {entry.target}: already mounted")
continue
logger.debug(&"fsck returned status code {retcode}")
logger.debug(&"Mounting filesystem {entry.source} ({entry.fstype}) at {entry.target} with mount option(s) {entry.data}")
logger.trace(&"Calling mount('{entry.source}', '{entry.target}', '{entry.fstype}', {entry.mountflags}, '{entry.data}')")
retcode = mount(entry.source, entry.target, entry.fstype, entry.mountflags, entry.data)

View File

@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import segfaults # Makes us catch segfaults as NilAccessDefect exceptions!
import strformat
import ../util/[logging, misc]
import services
@ -27,8 +27,8 @@ proc mainLoop*(logger: Logger) =
logger.info("System initialization complete, going idle")
while true:
try:
# TODO
sleepSeconds(5)
logger.info(&"Working...")
sleepSeconds(30)
except:
logger.critical(&"A critical error has occurred while running, restarting the mainloop! Error -> {getCurrentExceptionMsg()}")
# We *absolutely* cannot die

View File

@ -100,93 +100,102 @@ proc removeService*(service: Service) =
proc supervisorWorker(logger: Logger, service: Service, pid: int) =
## This is the actual worker that supervises the service process
logger.trace(&"New supervisor for service '{service.name}' has been spawned")
var pid = pid
var status: cint
var returnCode: int
var sig: int
var process: Process
while true:
returnCode = posix.waitPid(Pid(pid), status, WUNTRACED)
returnCode = posix.waitPid(cint(pid), status, WUNTRACED)
if WIFEXITED(status):
sig = 0
elif WIFSIGNALED(status):
sig = WTERMSIG(status)
else:
sig = -1
if service.restartOnFailure and sig > 0:
logger.info(&"Service {service.name} has exited with return code {returnCode} (terminated by signal {sig}: {strsignal(cint(sig))}), sleeping {service.restartDelay} seconds before restarting it")
if sig > 0 and service.restartOnFailure:
logger.info(&"Service '{service.name}' ({returnCode}) has crashed (terminated by signal {sig}: {strsignal(cint(sig))}), sleeping {service.restartDelay} seconds before restarting it")
removeManagedProcess(pid)
sleepSeconds(service.restartDelay)
var split = shlex(service.exec)
if split.error:
logger.error(&"Error while starting service {service.name}: invalid exec syntax")
return
logger.error(&"Error while restarting service '{service.name}': invalid exec syntax")
break
var arguments = split.words
let progName = arguments[0]
arguments = arguments[1..^1]
pid = startProcess(progName, workingDir=service.workDir, args=arguments, options={poParentStreams}).processID()
else:
logger.info(&"Service {service.name} has exited with return code {returnCode}), shutting down controlling process")
process = startProcess(progName, workingDir=service.workDir, args=arguments)
pid = process.processID()
elif sig > 0:
logger.info(&"Service '{service.name}' ({returnCode}) has crashed (terminated by signal {sig}: {strsignal(cint(sig))}), shutting down controlling process")
break
else:
logger.info(&"Service '{service.name}' ({returnCode}) has exited, shutting down controlling process")
break
if process != nil:
process.close()
proc startService(logger: Logger, service: Service) =
## Starts a single service (this is called by
## startServices below until all services have
## been started)
var split = shlex(service.exec)
if split.error:
logger.error(&"Error while starting service {service.name}: invalid exec syntax")
return
var arguments = split.words
let progName = arguments[0]
arguments = arguments[1..^1]
var process: Process
try:
var process = startProcess(progName, workingDir=service.workDir, args=arguments, options={poParentStreams, })
var split = shlex(service.exec)
if split.error:
logger.error(&"Error while starting service '{service.name}': invalid exec syntax")
quit(0)
var arguments = split.words
let progName = arguments[0]
arguments = arguments[1..^1]
process = startProcess(progName, workingDir=service.workDir, args=arguments)
if service.supervised:
supervisorWorker(logger, service, process.processID)
except OSError:
# If the service is unsupervised we just exit
except:
logger.error(&"Error while starting service {service.name}: {getCurrentExceptionMsg()}")
if process != nil:
process.close()
quit(0)
proc startServices*(logger: Logger, workers: int = 1, level: RunLevel) =
## Starts the services in the given
## runlevel. The workers parameter
## configures parallelism and allows
## for faster boot times by starting
## services concurrently rather than
## sequentially (1 to disable parallelism).
## Note this function immediately returns to
## the caller and forks in the background
echo posix.getpid()
discard readLine(stdin)
proc startServices*(logger: Logger, level: RunLevel, workers: int = 1) =
## Starts the registered services in the
## given runlevel
if workers > cpuinfo.countProcessors() - 1:
logger.warning(&"The configured number of workers ({workers}) is greater than the recommended one ({cpuinfo.countProcessors() - 1}), performance may degrade")
var workerCount: int = 0
var status: cint
var pid: int = posix.fork()
if pid == -1:
logger.fatal(&"Could not fork(): {posix.strerror(posix.errno)}")
return
logger.error(&"Error, cannot fork: {posix.strerror(posix.errno)}")
elif pid == 0:
quit(0)
var servicesCopy: seq[Service] = @[]
echo servicesCopy.len(), " ", posix.getpid()
for service in services:
if service.runlevel == level:
servicesCopy.add(service)
echo servicesCopy.len(), " ", posix.getpid()
if workers > cpuinfo.countProcessors() * 2 - 1:
logger.warning(&"The configured workers count is beyond the recommended threshold ({workers} > {cpuinfo.countProcessors() * 2 - 1}), performance may degrade")
while servicesCopy.len() > 0:
echo servicesCopy.len(), " ", posix.getpid()
sleepSeconds(5)
for i in 1..workers:
logger.debug("Started service spawner process")
var servicesCopy: seq[Service] = @[]
for service in services:
if service.runlevel == level:
servicesCopy.add(service)
while servicesCopy.len() > 0:
if workerCount == workers:
discard waitPid(cint(pid), status, WUNTRACED)
dec(workerCount)
pid = posix.fork()
if pid == -1:
logger.error(&"An error occurred while forking to spawn services, trying again: {posix.strerror(posix.errno)}")
elif pid == 0:
logger.info(&"Starting service {servicesCopy[0].name}")
logger.trace(&"New child has been spawned")
if not servicesCopy[0].supervised:
logger.info(&"Starting unsupervised service '{servicesCopy[0].name}'")
else:
logger.info(&"Starting supervised service '{servicesCopy[0].name}'")
startService(logger, servicesCopy[0])
else:
if servicesCopy.len() > 0 and servicesCopy[0].supervised:
elif servicesCopy.len() > 0:
workerCount += 1
if servicesCopy[0].supervised:
addManagedProcess(pid, servicesCopy[0])
servicesCopy.del(0)
quit(0)
else:
discard waitPid(cint(pid), status, WUNTRACED)

View File

@ -79,6 +79,15 @@ proc nimDExit*(logger: Logger, code: int, emerg: bool = true) =
## NimD's exit point. This function tries to shut down
## as cleanly as possible. When emerg equals true, it will
## try to spawn a root shell and exit
if emerg:
var status: cint
# We're in emergency mode: do not crash the kernel, spawn a shell and exit
logger.fatal("NimD has entered emergency mode and cannot continue. You will be now (hopefully) dropped in a root shell: you're on your own. May the force be with you")
logger.info("Terminating child processes with SIGKILL")
discard posix.kill(SIGKILL, -1)
discard posix.waitPid(-1, status, 0)
discard execShellCmd("/bin/sh") # TODO: Is this fine? maybe use execProcess
quit(-1)
logger.warning("The system is shutting down")
logger.info("Processing shutdown runlevel")
# TODO
@ -89,22 +98,15 @@ proc nimDExit*(logger: Logger, code: int, emerg: bool = true) =
except:
logger.error(&"An error has occurred while calling shutdown handlers. Error -> {getCurrentExceptionMsg()}")
# Note: continues calling handlers!
if emerg:
# We're in emergency mode: do not crash the kernel, spawn a shell and exit
logger.fatal("NimD has entered emergency mode and cannot continue. You will be now (hopefully) dropped in a root shell: you're on your own. May the force be with you")
discard execShellCmd("/bin/sh") # TODO: Is this fine? maybe use execProcess
else:
logger.info("Terminating child processes with SIGTERM")
logger.debug(&"Waiting {sigTermDelay} seconds for the kernel to deliver signals")
discard posix.kill(SIGTERM, -1) # The kernel handles this for us asynchronously
var t = cpuTime()
# We wait some time for the signals to propagate
while anyUserlandProcessLeft():
sleepSeconds(0.25)
if cpuTime() - t >= sigTermDelay:
break
if anyUserlandProcessLeft():
logger.info("Terminating child processes with SIGKILL")
discard posix.kill(SIGKILL, -1)
logger.warning("Shutdown procedure complete, sending final termination signal")
quit(code)
logger.info("Terminating child processes with SIGTERM")
logger.debug(&"Waiting up to {sigTermDelay} seconds for the kernel to deliver signals")
discard posix.kill(SIGTERM, -1) # The kernel handles this for us asynchronously
var t = cpuTime()
# We wait some time for the signals to propagate
while anyUserlandProcessLeft() or cpuTime() - t >= sigTermDelay:
sleepSeconds(0.25)
if anyUserlandProcessLeft():
logger.info("Terminating child processes with SIGKILL")
discard posix.kill(SIGKILL, -1)
logger.warning("Shutdown procedure complete, sending final termination signal")
quit(code)

View File

@ -39,8 +39,8 @@ proc main(logger: Logger, mountDisks: bool = true, fstab: string = "/etc/fstab")
logger.fatal(&"NimD must run as root, but current user id is {uid}")
nimDExit(logger, EPERM) # EPERM - Operation not permitted
logger.trace("Setting up signal handlers")
onSignal(SIGABRT, SIGALRM, SIGHUP, SIGILL, SIGKILL, SIGQUIT, SIGSTOP, SIGTSTP,
SIGTRAP, SIGTERM, SIGPIPE, SIGUSR1, SIGUSR2, 6, SIGFPE, SIGBUS, SIGURG, SIGINT): # 6 is SIGIOT
onSignal(SIGABRT, SIGALRM, SIGHUP, SIGILL, SIGKILL, SIGQUIT, SIGSTOP, SIGSEGV, SIGTSTP,
SIGTRAP, SIGPIPE, SIGUSR1, SIGUSR2, 6, SIGFPE, SIGBUS, SIGURG, SIGTERM, SIGINT): # 6 is SIGIOT
# Can't capture local variables because this implicitly generates
# a noconv procedure, so we use getDefaultLogger() instead. Must find
# a better solution long-term because we need the configuration from
@ -51,6 +51,9 @@ proc main(logger: Logger, mountDisks: bool = true, fstab: string = "/etc/fstab")
# One of the key features of an init system is reaping child
# processes!
reapProcess(getDefaultLogger())
onSignal(SIGINT):
# Temporary
nimDExit(getDefaultLogger(), 131, emerg=true)
addSymlink(newSymlink(dest="/dev/fd", source="/proc/self/fd"))
addSymlink(newSymlink(dest="/dev/fd/0", source="/proc/self/fd/0"))
addSymlink(newSymlink(dest="/dev/fd/1", source="/proc/self/fd/1"))
@ -90,12 +93,38 @@ proc main(logger: Logger, mountDisks: bool = true, fstab: string = "/etc/fstab")
logger.debug(&"Hostname was set to '{setHostname(logger)}'")
logger.info("Creating symlinks")
createSymlinks(logger)
logger.info("Creating directories")
createDirectories(logger)
logger.debug("Entering critical fork() section: blocking signals")
var sigset: Sigset
# TODO
logger.info("Processing boot runlevel")
addService(newService(name="Test", description="prints owo", exec="/bin/echo owo",
addService(newService(name="echoer", description="prints owo", exec="/bin/echo owo",
runlevel=Boot, kind=Oneshot, workDir=getCurrentDir(),
supervised=false, restartOnFailure=false, restartDelay=0))
startServices(logger, workers=1, level=Boot)
addService(newService(name="sleeper", description="la mamma di licenziato",
exec="/usr/bin/sleep 10", supervised=true, restartOnFailure=true,
restartDelay=5, runlevel=Boot, workDir="/home", kind=Simple))
addService(newService(name="errorer", description="la mamma di gavd",
exec="/bin/false", supervised=true, restartOnFailure=true,
restartDelay=5, runlevel=Boot, workDir="/", kind=Simple))
startServices(logger, workers=2, level=Boot)
logger.debug("Setting up real signal handlers")
onSignal(SIGABRT, SIGALRM, SIGHUP, SIGILL, SIGKILL, SIGQUIT, SIGSTOP, SIGSEGV, SIGTSTP,
SIGTRAP, SIGPIPE, SIGUSR1, SIGUSR2, 6, SIGFPE, SIGBUS, SIGURG, SIGTERM): # 6 is SIGIOT
# Can't capture local variables because this implicitly generates
# a noconv procedure, so we use getDefaultLogger() instead. Must find
# a better solution long-term because we need the configuration from
# our own logger object (otherwise we'd always create a new one and
# never switch our logs to file once booting is completed)
getDefaultLogger().warning(&"Ignoring signal {sig} ({strsignal(sig)})") # Nim injects the variable "sig" into the scope. Gotta love those macros
onSignal(SIGCHLD):
# One of the key features of an init system is reaping child
# processes!
reapProcess(getDefaultLogger())
onSignal(SIGINT):
# Temporary
nimDExit(getDefaultLogger(), 131, emerg=true)
logger.debug("Starting main loop")
mainLoop(logger)
@ -144,7 +173,6 @@ when isMainModule:
try:
main(logger)
except:
raise
logger.fatal(&"A fatal unrecoverable error has occurred during startup and NimD cannot continue: {getCurrentExceptionMsg()}")
nimDExit(logger, 131) # ENOTRECOVERABLE - State not recoverable
# This will almost certainly cause the kernel to crash with an error the likes of "Kernel not syncing, attempted to kill init!",

View File

@ -15,6 +15,7 @@
# A simple logging module inspired by python's own logging facility
import terminal
import strformat
import posix
import times
@ -73,47 +74,55 @@ proc getDefaultLogger*(): Logger =
## level that writes the given message to the
## standard error with some basic info like the
## current date and time and the log level
setStdIoUnbuffered() # Just in case
proc logTrace(self: LogHandler, logger: Logger, message: string) =
setForegroundColor(fgMagenta)
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - TRACE] {message}""")
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - TRACE ({posix.getpid()})] {message}""")
stderr.flushFile()
setForegroundColor(fgDefault)
proc logDebug(self: LogHandler, logger: Logger, message: string) =
setForegroundColor(fgCyan)
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - DEBUG] {message}""")
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - DEBUG ({posix.getpid()})] {message}""")
stderr.flushFile()
setForegroundColor(fgDefault)
proc logInfo(self: LogHandler, logger: Logger, message: string) =
setForegroundColor(fgGreen)
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - INFO] {message}""")
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - INFO ({posix.getpid()})] {message}""")
stderr.flushFile()
setForegroundColor(fgDefault)
proc logWarning(self: LogHandler, logger: Logger, message: string) =
setForegroundColor(fgYellow)
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - WARNING] {message}""")
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - WARNING ({posix.getpid()})] {message}""")
stderr.flushFile()
setForegroundColor(fgDefault)
proc logError(self: LogHandler, logger: Logger, message: string) =
setForegroundColor(fgRed)
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - ERROR] {message}""")
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - ERROR ({posix.getpid()})] {message}""")
stderr.flushFile()
setForegroundColor(fgDefault)
proc logCritical(self: LogHandler, logger: Logger, message: string) =
setForegroundColor(fgRed)
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - CRITICAL] {message}""")
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - CRITICAL ({posix.getpid()})] {message}""")
stderr.flushFile()
setForegroundColor(fgDefault)
proc logFatal(self: LogHandler, logger: Logger, message: string) =
setForegroundColor(fgBlack)
setBackgroundColor(bgRed)
stderr.write(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - FATAL]""")
stderr.write(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - FATAL ({posix.getpid()})]""")
setForegroundColor(fgRed)
setBackgroundColor(bgDefault)
stderr.writeline(&" {message}")
setForegroundColor(fgDefault)
stderr.flushFile()
result = newLogger()
result.addHandler(createHandler(logTrace, LogLevel.Trace))