mirror of https://github.com/nocturn9x/nimd.git
Fixed most bugs with spawning services (last: block signals)
This commit is contained in:
parent
8517cdb736
commit
41a54288e3
|
@ -240,7 +240,6 @@ proc mountRealDisks*(logger: Logger, fstab: string = "/etc/fstab") =
|
|||
if checkDiskIsMounted(entry, expand=true):
|
||||
logger.debug(&"Skipping mounting filesystem {entry.source} ({entry.fstype}) at {entry.target}: already mounted")
|
||||
continue
|
||||
logger.debug(&"fsck returned status code {retcode}")
|
||||
logger.debug(&"Mounting filesystem {entry.source} ({entry.fstype}) at {entry.target} with mount option(s) {entry.data}")
|
||||
logger.trace(&"Calling mount('{entry.source}', '{entry.target}', '{entry.fstype}', {entry.mountflags}, '{entry.data}')")
|
||||
retcode = mount(entry.source, entry.target, entry.fstype, entry.mountflags, entry.data)
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import segfaults # Makes us catch segfaults as NilAccessDefect exceptions!
|
||||
import strformat
|
||||
|
||||
|
||||
|
||||
import ../util/[logging, misc]
|
||||
import services
|
||||
|
||||
|
@ -27,8 +27,8 @@ proc mainLoop*(logger: Logger) =
|
|||
logger.info("System initialization complete, going idle")
|
||||
while true:
|
||||
try:
|
||||
# TODO
|
||||
sleepSeconds(5)
|
||||
logger.info(&"Working...")
|
||||
sleepSeconds(30)
|
||||
except:
|
||||
logger.critical(&"A critical error has occurred while running, restarting the mainloop! Error -> {getCurrentExceptionMsg()}")
|
||||
# We *absolutely* cannot die
|
||||
|
|
|
@ -100,93 +100,102 @@ proc removeService*(service: Service) =
|
|||
|
||||
proc supervisorWorker(logger: Logger, service: Service, pid: int) =
|
||||
## This is the actual worker that supervises the service process
|
||||
logger.trace(&"New supervisor for service '{service.name}' has been spawned")
|
||||
var pid = pid
|
||||
var status: cint
|
||||
var returnCode: int
|
||||
var sig: int
|
||||
var process: Process
|
||||
while true:
|
||||
returnCode = posix.waitPid(Pid(pid), status, WUNTRACED)
|
||||
returnCode = posix.waitPid(cint(pid), status, WUNTRACED)
|
||||
if WIFEXITED(status):
|
||||
sig = 0
|
||||
elif WIFSIGNALED(status):
|
||||
sig = WTERMSIG(status)
|
||||
else:
|
||||
sig = -1
|
||||
if service.restartOnFailure and sig > 0:
|
||||
logger.info(&"Service {service.name} has exited with return code {returnCode} (terminated by signal {sig}: {strsignal(cint(sig))}), sleeping {service.restartDelay} seconds before restarting it")
|
||||
if sig > 0 and service.restartOnFailure:
|
||||
logger.info(&"Service '{service.name}' ({returnCode}) has crashed (terminated by signal {sig}: {strsignal(cint(sig))}), sleeping {service.restartDelay} seconds before restarting it")
|
||||
removeManagedProcess(pid)
|
||||
sleepSeconds(service.restartDelay)
|
||||
var split = shlex(service.exec)
|
||||
if split.error:
|
||||
logger.error(&"Error while starting service {service.name}: invalid exec syntax")
|
||||
return
|
||||
logger.error(&"Error while restarting service '{service.name}': invalid exec syntax")
|
||||
break
|
||||
var arguments = split.words
|
||||
let progName = arguments[0]
|
||||
arguments = arguments[1..^1]
|
||||
pid = startProcess(progName, workingDir=service.workDir, args=arguments, options={poParentStreams}).processID()
|
||||
else:
|
||||
logger.info(&"Service {service.name} has exited with return code {returnCode}), shutting down controlling process")
|
||||
process = startProcess(progName, workingDir=service.workDir, args=arguments)
|
||||
pid = process.processID()
|
||||
elif sig > 0:
|
||||
logger.info(&"Service '{service.name}' ({returnCode}) has crashed (terminated by signal {sig}: {strsignal(cint(sig))}), shutting down controlling process")
|
||||
break
|
||||
|
||||
else:
|
||||
logger.info(&"Service '{service.name}' ({returnCode}) has exited, shutting down controlling process")
|
||||
break
|
||||
if process != nil:
|
||||
process.close()
|
||||
|
||||
|
||||
proc startService(logger: Logger, service: Service) =
|
||||
## Starts a single service (this is called by
|
||||
## startServices below until all services have
|
||||
## been started)
|
||||
|
||||
var split = shlex(service.exec)
|
||||
if split.error:
|
||||
logger.error(&"Error while starting service {service.name}: invalid exec syntax")
|
||||
return
|
||||
var arguments = split.words
|
||||
let progName = arguments[0]
|
||||
arguments = arguments[1..^1]
|
||||
var process: Process
|
||||
try:
|
||||
var process = startProcess(progName, workingDir=service.workDir, args=arguments, options={poParentStreams, })
|
||||
var split = shlex(service.exec)
|
||||
if split.error:
|
||||
logger.error(&"Error while starting service '{service.name}': invalid exec syntax")
|
||||
quit(0)
|
||||
var arguments = split.words
|
||||
let progName = arguments[0]
|
||||
arguments = arguments[1..^1]
|
||||
process = startProcess(progName, workingDir=service.workDir, args=arguments)
|
||||
if service.supervised:
|
||||
supervisorWorker(logger, service, process.processID)
|
||||
except OSError:
|
||||
# If the service is unsupervised we just exit
|
||||
except:
|
||||
logger.error(&"Error while starting service {service.name}: {getCurrentExceptionMsg()}")
|
||||
if process != nil:
|
||||
process.close()
|
||||
quit(0)
|
||||
|
||||
|
||||
proc startServices*(logger: Logger, workers: int = 1, level: RunLevel) =
|
||||
## Starts the services in the given
|
||||
## runlevel. The workers parameter
|
||||
## configures parallelism and allows
|
||||
## for faster boot times by starting
|
||||
## services concurrently rather than
|
||||
## sequentially (1 to disable parallelism).
|
||||
## Note this function immediately returns to
|
||||
## the caller and forks in the background
|
||||
echo posix.getpid()
|
||||
discard readLine(stdin)
|
||||
proc startServices*(logger: Logger, level: RunLevel, workers: int = 1) =
|
||||
## Starts the registered services in the
|
||||
## given runlevel
|
||||
if workers > cpuinfo.countProcessors() - 1:
|
||||
logger.warning(&"The configured number of workers ({workers}) is greater than the recommended one ({cpuinfo.countProcessors() - 1}), performance may degrade")
|
||||
var workerCount: int = 0
|
||||
var status: cint
|
||||
var pid: int = posix.fork()
|
||||
if pid == -1:
|
||||
logger.fatal(&"Could not fork(): {posix.strerror(posix.errno)}")
|
||||
return
|
||||
logger.error(&"Error, cannot fork: {posix.strerror(posix.errno)}")
|
||||
elif pid == 0:
|
||||
quit(0)
|
||||
var servicesCopy: seq[Service] = @[]
|
||||
echo servicesCopy.len(), " ", posix.getpid()
|
||||
for service in services:
|
||||
if service.runlevel == level:
|
||||
servicesCopy.add(service)
|
||||
echo servicesCopy.len(), " ", posix.getpid()
|
||||
if workers > cpuinfo.countProcessors() * 2 - 1:
|
||||
logger.warning(&"The configured workers count is beyond the recommended threshold ({workers} > {cpuinfo.countProcessors() * 2 - 1}), performance may degrade")
|
||||
while servicesCopy.len() > 0:
|
||||
echo servicesCopy.len(), " ", posix.getpid()
|
||||
sleepSeconds(5)
|
||||
for i in 1..workers:
|
||||
logger.debug("Started service spawner process")
|
||||
var servicesCopy: seq[Service] = @[]
|
||||
for service in services:
|
||||
if service.runlevel == level:
|
||||
servicesCopy.add(service)
|
||||
while servicesCopy.len() > 0:
|
||||
if workerCount == workers:
|
||||
discard waitPid(cint(pid), status, WUNTRACED)
|
||||
dec(workerCount)
|
||||
pid = posix.fork()
|
||||
if pid == -1:
|
||||
logger.error(&"An error occurred while forking to spawn services, trying again: {posix.strerror(posix.errno)}")
|
||||
elif pid == 0:
|
||||
logger.info(&"Starting service {servicesCopy[0].name}")
|
||||
logger.trace(&"New child has been spawned")
|
||||
if not servicesCopy[0].supervised:
|
||||
logger.info(&"Starting unsupervised service '{servicesCopy[0].name}'")
|
||||
else:
|
||||
logger.info(&"Starting supervised service '{servicesCopy[0].name}'")
|
||||
startService(logger, servicesCopy[0])
|
||||
else:
|
||||
if servicesCopy.len() > 0 and servicesCopy[0].supervised:
|
||||
elif servicesCopy.len() > 0:
|
||||
workerCount += 1
|
||||
if servicesCopy[0].supervised:
|
||||
addManagedProcess(pid, servicesCopy[0])
|
||||
|
||||
servicesCopy.del(0)
|
||||
quit(0)
|
||||
else:
|
||||
discard waitPid(cint(pid), status, WUNTRACED)
|
||||
|
|
|
@ -79,6 +79,15 @@ proc nimDExit*(logger: Logger, code: int, emerg: bool = true) =
|
|||
## NimD's exit point. This function tries to shut down
|
||||
## as cleanly as possible. When emerg equals true, it will
|
||||
## try to spawn a root shell and exit
|
||||
if emerg:
|
||||
var status: cint
|
||||
# We're in emergency mode: do not crash the kernel, spawn a shell and exit
|
||||
logger.fatal("NimD has entered emergency mode and cannot continue. You will be now (hopefully) dropped in a root shell: you're on your own. May the force be with you")
|
||||
logger.info("Terminating child processes with SIGKILL")
|
||||
discard posix.kill(SIGKILL, -1)
|
||||
discard posix.waitPid(-1, status, 0)
|
||||
discard execShellCmd("/bin/sh") # TODO: Is this fine? maybe use execProcess
|
||||
quit(-1)
|
||||
logger.warning("The system is shutting down")
|
||||
logger.info("Processing shutdown runlevel")
|
||||
# TODO
|
||||
|
@ -89,22 +98,15 @@ proc nimDExit*(logger: Logger, code: int, emerg: bool = true) =
|
|||
except:
|
||||
logger.error(&"An error has occurred while calling shutdown handlers. Error -> {getCurrentExceptionMsg()}")
|
||||
# Note: continues calling handlers!
|
||||
if emerg:
|
||||
# We're in emergency mode: do not crash the kernel, spawn a shell and exit
|
||||
logger.fatal("NimD has entered emergency mode and cannot continue. You will be now (hopefully) dropped in a root shell: you're on your own. May the force be with you")
|
||||
discard execShellCmd("/bin/sh") # TODO: Is this fine? maybe use execProcess
|
||||
else:
|
||||
logger.info("Terminating child processes with SIGTERM")
|
||||
logger.debug(&"Waiting {sigTermDelay} seconds for the kernel to deliver signals")
|
||||
discard posix.kill(SIGTERM, -1) # The kernel handles this for us asynchronously
|
||||
var t = cpuTime()
|
||||
# We wait some time for the signals to propagate
|
||||
while anyUserlandProcessLeft():
|
||||
sleepSeconds(0.25)
|
||||
if cpuTime() - t >= sigTermDelay:
|
||||
break
|
||||
if anyUserlandProcessLeft():
|
||||
logger.info("Terminating child processes with SIGKILL")
|
||||
discard posix.kill(SIGKILL, -1)
|
||||
logger.warning("Shutdown procedure complete, sending final termination signal")
|
||||
logger.info("Terminating child processes with SIGTERM")
|
||||
logger.debug(&"Waiting up to {sigTermDelay} seconds for the kernel to deliver signals")
|
||||
discard posix.kill(SIGTERM, -1) # The kernel handles this for us asynchronously
|
||||
var t = cpuTime()
|
||||
# We wait some time for the signals to propagate
|
||||
while anyUserlandProcessLeft() or cpuTime() - t >= sigTermDelay:
|
||||
sleepSeconds(0.25)
|
||||
if anyUserlandProcessLeft():
|
||||
logger.info("Terminating child processes with SIGKILL")
|
||||
discard posix.kill(SIGKILL, -1)
|
||||
logger.warning("Shutdown procedure complete, sending final termination signal")
|
||||
quit(code)
|
38
src/main.nim
38
src/main.nim
|
@ -39,8 +39,8 @@ proc main(logger: Logger, mountDisks: bool = true, fstab: string = "/etc/fstab")
|
|||
logger.fatal(&"NimD must run as root, but current user id is {uid}")
|
||||
nimDExit(logger, EPERM) # EPERM - Operation not permitted
|
||||
logger.trace("Setting up signal handlers")
|
||||
onSignal(SIGABRT, SIGALRM, SIGHUP, SIGILL, SIGKILL, SIGQUIT, SIGSTOP, SIGTSTP,
|
||||
SIGTRAP, SIGTERM, SIGPIPE, SIGUSR1, SIGUSR2, 6, SIGFPE, SIGBUS, SIGURG, SIGINT): # 6 is SIGIOT
|
||||
onSignal(SIGABRT, SIGALRM, SIGHUP, SIGILL, SIGKILL, SIGQUIT, SIGSTOP, SIGSEGV, SIGTSTP,
|
||||
SIGTRAP, SIGPIPE, SIGUSR1, SIGUSR2, 6, SIGFPE, SIGBUS, SIGURG, SIGTERM, SIGINT): # 6 is SIGIOT
|
||||
# Can't capture local variables because this implicitly generates
|
||||
# a noconv procedure, so we use getDefaultLogger() instead. Must find
|
||||
# a better solution long-term because we need the configuration from
|
||||
|
@ -51,6 +51,9 @@ proc main(logger: Logger, mountDisks: bool = true, fstab: string = "/etc/fstab")
|
|||
# One of the key features of an init system is reaping child
|
||||
# processes!
|
||||
reapProcess(getDefaultLogger())
|
||||
onSignal(SIGINT):
|
||||
# Temporary
|
||||
nimDExit(getDefaultLogger(), 131, emerg=true)
|
||||
addSymlink(newSymlink(dest="/dev/fd", source="/proc/self/fd"))
|
||||
addSymlink(newSymlink(dest="/dev/fd/0", source="/proc/self/fd/0"))
|
||||
addSymlink(newSymlink(dest="/dev/fd/1", source="/proc/self/fd/1"))
|
||||
|
@ -90,12 +93,38 @@ proc main(logger: Logger, mountDisks: bool = true, fstab: string = "/etc/fstab")
|
|||
logger.debug(&"Hostname was set to '{setHostname(logger)}'")
|
||||
logger.info("Creating symlinks")
|
||||
createSymlinks(logger)
|
||||
logger.info("Creating directories")
|
||||
createDirectories(logger)
|
||||
logger.debug("Entering critical fork() section: blocking signals")
|
||||
var sigset: Sigset
|
||||
# TODO
|
||||
logger.info("Processing boot runlevel")
|
||||
addService(newService(name="Test", description="prints owo", exec="/bin/echo owo",
|
||||
addService(newService(name="echoer", description="prints owo", exec="/bin/echo owo",
|
||||
runlevel=Boot, kind=Oneshot, workDir=getCurrentDir(),
|
||||
supervised=false, restartOnFailure=false, restartDelay=0))
|
||||
startServices(logger, workers=1, level=Boot)
|
||||
addService(newService(name="sleeper", description="la mamma di licenziato",
|
||||
exec="/usr/bin/sleep 10", supervised=true, restartOnFailure=true,
|
||||
restartDelay=5, runlevel=Boot, workDir="/home", kind=Simple))
|
||||
addService(newService(name="errorer", description="la mamma di gavd",
|
||||
exec="/bin/false", supervised=true, restartOnFailure=true,
|
||||
restartDelay=5, runlevel=Boot, workDir="/", kind=Simple))
|
||||
startServices(logger, workers=2, level=Boot)
|
||||
logger.debug("Setting up real signal handlers")
|
||||
onSignal(SIGABRT, SIGALRM, SIGHUP, SIGILL, SIGKILL, SIGQUIT, SIGSTOP, SIGSEGV, SIGTSTP,
|
||||
SIGTRAP, SIGPIPE, SIGUSR1, SIGUSR2, 6, SIGFPE, SIGBUS, SIGURG, SIGTERM): # 6 is SIGIOT
|
||||
# Can't capture local variables because this implicitly generates
|
||||
# a noconv procedure, so we use getDefaultLogger() instead. Must find
|
||||
# a better solution long-term because we need the configuration from
|
||||
# our own logger object (otherwise we'd always create a new one and
|
||||
# never switch our logs to file once booting is completed)
|
||||
getDefaultLogger().warning(&"Ignoring signal {sig} ({strsignal(sig)})") # Nim injects the variable "sig" into the scope. Gotta love those macros
|
||||
onSignal(SIGCHLD):
|
||||
# One of the key features of an init system is reaping child
|
||||
# processes!
|
||||
reapProcess(getDefaultLogger())
|
||||
onSignal(SIGINT):
|
||||
# Temporary
|
||||
nimDExit(getDefaultLogger(), 131, emerg=true)
|
||||
logger.debug("Starting main loop")
|
||||
mainLoop(logger)
|
||||
|
||||
|
@ -144,7 +173,6 @@ when isMainModule:
|
|||
try:
|
||||
main(logger)
|
||||
except:
|
||||
raise
|
||||
logger.fatal(&"A fatal unrecoverable error has occurred during startup and NimD cannot continue: {getCurrentExceptionMsg()}")
|
||||
nimDExit(logger, 131) # ENOTRECOVERABLE - State not recoverable
|
||||
# This will almost certainly cause the kernel to crash with an error the likes of "Kernel not syncing, attempted to kill init!",
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
# A simple logging module inspired by python's own logging facility
|
||||
import terminal
|
||||
import strformat
|
||||
import posix
|
||||
import times
|
||||
|
||||
|
||||
|
@ -78,42 +79,50 @@ proc getDefaultLogger*(): Logger =
|
|||
|
||||
proc logTrace(self: LogHandler, logger: Logger, message: string) =
|
||||
setForegroundColor(fgMagenta)
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - TRACE] {message}""")
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - TRACE ({posix.getpid()})] {message}""")
|
||||
stderr.flushFile()
|
||||
setForegroundColor(fgDefault)
|
||||
|
||||
proc logDebug(self: LogHandler, logger: Logger, message: string) =
|
||||
setForegroundColor(fgCyan)
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - DEBUG] {message}""")
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - DEBUG ({posix.getpid()})] {message}""")
|
||||
stderr.flushFile()
|
||||
setForegroundColor(fgDefault)
|
||||
|
||||
proc logInfo(self: LogHandler, logger: Logger, message: string) =
|
||||
setForegroundColor(fgGreen)
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - INFO] {message}""")
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - INFO ({posix.getpid()})] {message}""")
|
||||
stderr.flushFile()
|
||||
setForegroundColor(fgDefault)
|
||||
|
||||
proc logWarning(self: LogHandler, logger: Logger, message: string) =
|
||||
setForegroundColor(fgYellow)
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - WARNING] {message}""")
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - WARNING ({posix.getpid()})] {message}""")
|
||||
stderr.flushFile()
|
||||
setForegroundColor(fgDefault)
|
||||
|
||||
proc logError(self: LogHandler, logger: Logger, message: string) =
|
||||
setForegroundColor(fgRed)
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - ERROR] {message}""")
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - ERROR ({posix.getpid()})] {message}""")
|
||||
stderr.flushFile()
|
||||
setForegroundColor(fgDefault)
|
||||
|
||||
proc logCritical(self: LogHandler, logger: Logger, message: string) =
|
||||
setForegroundColor(fgRed)
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - CRITICAL] {message}""")
|
||||
stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - CRITICAL ({posix.getpid()})] {message}""")
|
||||
stderr.flushFile()
|
||||
setForegroundColor(fgDefault)
|
||||
|
||||
proc logFatal(self: LogHandler, logger: Logger, message: string) =
|
||||
setForegroundColor(fgBlack)
|
||||
setBackgroundColor(bgRed)
|
||||
stderr.write(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - FATAL]""")
|
||||
stderr.write(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - FATAL ({posix.getpid()})]""")
|
||||
setForegroundColor(fgRed)
|
||||
setBackgroundColor(bgDefault)
|
||||
stderr.writeline(&" {message}")
|
||||
setForegroundColor(fgDefault)
|
||||
stderr.flushFile()
|
||||
|
||||
|
||||
result = newLogger()
|
||||
result.addHandler(createHandler(logTrace, LogLevel.Trace))
|
||||
|
|
Loading…
Reference in New Issue