diff --git a/src/core/fs.nim b/src/core/fs.nim index 85f2751..8fa874c 100644 --- a/src/core/fs.nim +++ b/src/core/fs.nim @@ -240,7 +240,6 @@ proc mountRealDisks*(logger: Logger, fstab: string = "/etc/fstab") = if checkDiskIsMounted(entry, expand=true): logger.debug(&"Skipping mounting filesystem {entry.source} ({entry.fstype}) at {entry.target}: already mounted") continue - logger.debug(&"fsck returned status code {retcode}") logger.debug(&"Mounting filesystem {entry.source} ({entry.fstype}) at {entry.target} with mount option(s) {entry.data}") logger.trace(&"Calling mount('{entry.source}', '{entry.target}', '{entry.fstype}', {entry.mountflags}, '{entry.data}')") retcode = mount(entry.source, entry.target, entry.fstype, entry.mountflags, entry.data) diff --git a/src/core/mainloop.nim b/src/core/mainloop.nim index b427842..884eede 100644 --- a/src/core/mainloop.nim +++ b/src/core/mainloop.nim @@ -11,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import segfaults # Makes us catch segfaults as NilAccessDefect exceptions! import strformat + import ../util/[logging, misc] import services @@ -27,8 +27,8 @@ proc mainLoop*(logger: Logger) = logger.info("System initialization complete, going idle") while true: try: - # TODO - sleepSeconds(5) + logger.info(&"Working...") + sleepSeconds(30) except: logger.critical(&"A critical error has occurred while running, restarting the mainloop! Error -> {getCurrentExceptionMsg()}") # We *absolutely* cannot die diff --git a/src/core/services.nim b/src/core/services.nim index 1b9fbfe..b74babb 100644 --- a/src/core/services.nim +++ b/src/core/services.nim @@ -100,93 +100,102 @@ proc removeService*(service: Service) = proc supervisorWorker(logger: Logger, service: Service, pid: int) = ## This is the actual worker that supervises the service process + logger.trace(&"New supervisor for service '{service.name}' has been spawned") var pid = pid var status: cint var returnCode: int var sig: int + var process: Process while true: - returnCode = posix.waitPid(Pid(pid), status, WUNTRACED) + returnCode = posix.waitPid(cint(pid), status, WUNTRACED) if WIFEXITED(status): sig = 0 elif WIFSIGNALED(status): sig = WTERMSIG(status) else: sig = -1 - if service.restartOnFailure and sig > 0: - logger.info(&"Service {service.name} has exited with return code {returnCode} (terminated by signal {sig}: {strsignal(cint(sig))}), sleeping {service.restartDelay} seconds before restarting it") + if sig > 0 and service.restartOnFailure: + logger.info(&"Service '{service.name}' ({returnCode}) has crashed (terminated by signal {sig}: {strsignal(cint(sig))}), sleeping {service.restartDelay} seconds before restarting it") removeManagedProcess(pid) sleepSeconds(service.restartDelay) var split = shlex(service.exec) if split.error: - logger.error(&"Error while starting service {service.name}: invalid exec syntax") - return + logger.error(&"Error while restarting service '{service.name}': invalid exec syntax") + break var arguments = split.words let progName = arguments[0] arguments = arguments[1..^1] - pid = startProcess(progName, workingDir=service.workDir, args=arguments, options={poParentStreams}).processID() - else: - logger.info(&"Service {service.name} has exited with return code {returnCode}), shutting down controlling process") + process = startProcess(progName, workingDir=service.workDir, args=arguments) + pid = process.processID() + elif sig > 0: + logger.info(&"Service '{service.name}' ({returnCode}) has crashed (terminated by signal {sig}: {strsignal(cint(sig))}), shutting down controlling process") break - - + else: + logger.info(&"Service '{service.name}' ({returnCode}) has exited, shutting down controlling process") + break + if process != nil: + process.close() + proc startService(logger: Logger, service: Service) = ## Starts a single service (this is called by ## startServices below until all services have ## been started) - - var split = shlex(service.exec) - if split.error: - logger.error(&"Error while starting service {service.name}: invalid exec syntax") - return - var arguments = split.words - let progName = arguments[0] - arguments = arguments[1..^1] + var process: Process try: - var process = startProcess(progName, workingDir=service.workDir, args=arguments, options={poParentStreams, }) + var split = shlex(service.exec) + if split.error: + logger.error(&"Error while starting service '{service.name}': invalid exec syntax") + quit(0) + var arguments = split.words + let progName = arguments[0] + arguments = arguments[1..^1] + process = startProcess(progName, workingDir=service.workDir, args=arguments) if service.supervised: supervisorWorker(logger, service, process.processID) - except OSError: + # If the service is unsupervised we just exit + except: logger.error(&"Error while starting service {service.name}: {getCurrentExceptionMsg()}") + if process != nil: + process.close() quit(0) -proc startServices*(logger: Logger, workers: int = 1, level: RunLevel) = - ## Starts the services in the given - ## runlevel. The workers parameter - ## configures parallelism and allows - ## for faster boot times by starting - ## services concurrently rather than - ## sequentially (1 to disable parallelism). - ## Note this function immediately returns to - ## the caller and forks in the background - echo posix.getpid() - discard readLine(stdin) +proc startServices*(logger: Logger, level: RunLevel, workers: int = 1) = + ## Starts the registered services in the + ## given runlevel + if workers > cpuinfo.countProcessors() - 1: + logger.warning(&"The configured number of workers ({workers}) is greater than the recommended one ({cpuinfo.countProcessors() - 1}), performance may degrade") + var workerCount: int = 0 + var status: cint var pid: int = posix.fork() if pid == -1: - logger.fatal(&"Could not fork(): {posix.strerror(posix.errno)}") - return + logger.error(&"Error, cannot fork: {posix.strerror(posix.errno)}") elif pid == 0: - quit(0) - var servicesCopy: seq[Service] = @[] - echo servicesCopy.len(), " ", posix.getpid() - for service in services: - if service.runlevel == level: - servicesCopy.add(service) - echo servicesCopy.len(), " ", posix.getpid() - if workers > cpuinfo.countProcessors() * 2 - 1: - logger.warning(&"The configured workers count is beyond the recommended threshold ({workers} > {cpuinfo.countProcessors() * 2 - 1}), performance may degrade") - while servicesCopy.len() > 0: - echo servicesCopy.len(), " ", posix.getpid() - sleepSeconds(5) - for i in 1..workers: + logger.debug("Started service spawner process") + var servicesCopy: seq[Service] = @[] + for service in services: + if service.runlevel == level: + servicesCopy.add(service) + while servicesCopy.len() > 0: + if workerCount == workers: + discard waitPid(cint(pid), status, WUNTRACED) + dec(workerCount) pid = posix.fork() if pid == -1: logger.error(&"An error occurred while forking to spawn services, trying again: {posix.strerror(posix.errno)}") elif pid == 0: - logger.info(&"Starting service {servicesCopy[0].name}") + logger.trace(&"New child has been spawned") + if not servicesCopy[0].supervised: + logger.info(&"Starting unsupervised service '{servicesCopy[0].name}'") + else: + logger.info(&"Starting supervised service '{servicesCopy[0].name}'") startService(logger, servicesCopy[0]) - else: - if servicesCopy.len() > 0 and servicesCopy[0].supervised: + elif servicesCopy.len() > 0: + workerCount += 1 + if servicesCopy[0].supervised: addManagedProcess(pid, servicesCopy[0]) - + servicesCopy.del(0) + quit(0) + else: + discard waitPid(cint(pid), status, WUNTRACED) diff --git a/src/core/shutdown.nim b/src/core/shutdown.nim index b1cf9bb..96bd6ef 100644 --- a/src/core/shutdown.nim +++ b/src/core/shutdown.nim @@ -79,6 +79,15 @@ proc nimDExit*(logger: Logger, code: int, emerg: bool = true) = ## NimD's exit point. This function tries to shut down ## as cleanly as possible. When emerg equals true, it will ## try to spawn a root shell and exit + if emerg: + var status: cint + # We're in emergency mode: do not crash the kernel, spawn a shell and exit + logger.fatal("NimD has entered emergency mode and cannot continue. You will be now (hopefully) dropped in a root shell: you're on your own. May the force be with you") + logger.info("Terminating child processes with SIGKILL") + discard posix.kill(SIGKILL, -1) + discard posix.waitPid(-1, status, 0) + discard execShellCmd("/bin/sh") # TODO: Is this fine? maybe use execProcess + quit(-1) logger.warning("The system is shutting down") logger.info("Processing shutdown runlevel") # TODO @@ -89,22 +98,15 @@ proc nimDExit*(logger: Logger, code: int, emerg: bool = true) = except: logger.error(&"An error has occurred while calling shutdown handlers. Error -> {getCurrentExceptionMsg()}") # Note: continues calling handlers! - if emerg: - # We're in emergency mode: do not crash the kernel, spawn a shell and exit - logger.fatal("NimD has entered emergency mode and cannot continue. You will be now (hopefully) dropped in a root shell: you're on your own. May the force be with you") - discard execShellCmd("/bin/sh") # TODO: Is this fine? maybe use execProcess - else: - logger.info("Terminating child processes with SIGTERM") - logger.debug(&"Waiting {sigTermDelay} seconds for the kernel to deliver signals") - discard posix.kill(SIGTERM, -1) # The kernel handles this for us asynchronously - var t = cpuTime() - # We wait some time for the signals to propagate - while anyUserlandProcessLeft(): - sleepSeconds(0.25) - if cpuTime() - t >= sigTermDelay: - break - if anyUserlandProcessLeft(): - logger.info("Terminating child processes with SIGKILL") - discard posix.kill(SIGKILL, -1) - logger.warning("Shutdown procedure complete, sending final termination signal") - quit(code) \ No newline at end of file + logger.info("Terminating child processes with SIGTERM") + logger.debug(&"Waiting up to {sigTermDelay} seconds for the kernel to deliver signals") + discard posix.kill(SIGTERM, -1) # The kernel handles this for us asynchronously + var t = cpuTime() + # We wait some time for the signals to propagate + while anyUserlandProcessLeft() or cpuTime() - t >= sigTermDelay: + sleepSeconds(0.25) + if anyUserlandProcessLeft(): + logger.info("Terminating child processes with SIGKILL") + discard posix.kill(SIGKILL, -1) + logger.warning("Shutdown procedure complete, sending final termination signal") + quit(code) diff --git a/src/main.nim b/src/main.nim index 8a7a7a3..2b14d2d 100644 --- a/src/main.nim +++ b/src/main.nim @@ -39,8 +39,8 @@ proc main(logger: Logger, mountDisks: bool = true, fstab: string = "/etc/fstab") logger.fatal(&"NimD must run as root, but current user id is {uid}") nimDExit(logger, EPERM) # EPERM - Operation not permitted logger.trace("Setting up signal handlers") - onSignal(SIGABRT, SIGALRM, SIGHUP, SIGILL, SIGKILL, SIGQUIT, SIGSTOP, SIGTSTP, - SIGTRAP, SIGTERM, SIGPIPE, SIGUSR1, SIGUSR2, 6, SIGFPE, SIGBUS, SIGURG, SIGINT): # 6 is SIGIOT + onSignal(SIGABRT, SIGALRM, SIGHUP, SIGILL, SIGKILL, SIGQUIT, SIGSTOP, SIGSEGV, SIGTSTP, + SIGTRAP, SIGPIPE, SIGUSR1, SIGUSR2, 6, SIGFPE, SIGBUS, SIGURG, SIGTERM, SIGINT): # 6 is SIGIOT # Can't capture local variables because this implicitly generates # a noconv procedure, so we use getDefaultLogger() instead. Must find # a better solution long-term because we need the configuration from @@ -51,6 +51,9 @@ proc main(logger: Logger, mountDisks: bool = true, fstab: string = "/etc/fstab") # One of the key features of an init system is reaping child # processes! reapProcess(getDefaultLogger()) + onSignal(SIGINT): + # Temporary + nimDExit(getDefaultLogger(), 131, emerg=true) addSymlink(newSymlink(dest="/dev/fd", source="/proc/self/fd")) addSymlink(newSymlink(dest="/dev/fd/0", source="/proc/self/fd/0")) addSymlink(newSymlink(dest="/dev/fd/1", source="/proc/self/fd/1")) @@ -90,12 +93,38 @@ proc main(logger: Logger, mountDisks: bool = true, fstab: string = "/etc/fstab") logger.debug(&"Hostname was set to '{setHostname(logger)}'") logger.info("Creating symlinks") createSymlinks(logger) + logger.info("Creating directories") createDirectories(logger) + logger.debug("Entering critical fork() section: blocking signals") + var sigset: Sigset + # TODO logger.info("Processing boot runlevel") - addService(newService(name="Test", description="prints owo", exec="/bin/echo owo", + addService(newService(name="echoer", description="prints owo", exec="/bin/echo owo", runlevel=Boot, kind=Oneshot, workDir=getCurrentDir(), supervised=false, restartOnFailure=false, restartDelay=0)) - startServices(logger, workers=1, level=Boot) + addService(newService(name="sleeper", description="la mamma di licenziato", + exec="/usr/bin/sleep 10", supervised=true, restartOnFailure=true, + restartDelay=5, runlevel=Boot, workDir="/home", kind=Simple)) + addService(newService(name="errorer", description="la mamma di gavd", + exec="/bin/false", supervised=true, restartOnFailure=true, + restartDelay=5, runlevel=Boot, workDir="/", kind=Simple)) + startServices(logger, workers=2, level=Boot) + logger.debug("Setting up real signal handlers") + onSignal(SIGABRT, SIGALRM, SIGHUP, SIGILL, SIGKILL, SIGQUIT, SIGSTOP, SIGSEGV, SIGTSTP, + SIGTRAP, SIGPIPE, SIGUSR1, SIGUSR2, 6, SIGFPE, SIGBUS, SIGURG, SIGTERM): # 6 is SIGIOT + # Can't capture local variables because this implicitly generates + # a noconv procedure, so we use getDefaultLogger() instead. Must find + # a better solution long-term because we need the configuration from + # our own logger object (otherwise we'd always create a new one and + # never switch our logs to file once booting is completed) + getDefaultLogger().warning(&"Ignoring signal {sig} ({strsignal(sig)})") # Nim injects the variable "sig" into the scope. Gotta love those macros + onSignal(SIGCHLD): + # One of the key features of an init system is reaping child + # processes! + reapProcess(getDefaultLogger()) + onSignal(SIGINT): + # Temporary + nimDExit(getDefaultLogger(), 131, emerg=true) logger.debug("Starting main loop") mainLoop(logger) @@ -144,7 +173,6 @@ when isMainModule: try: main(logger) except: - raise logger.fatal(&"A fatal unrecoverable error has occurred during startup and NimD cannot continue: {getCurrentExceptionMsg()}") nimDExit(logger, 131) # ENOTRECOVERABLE - State not recoverable # This will almost certainly cause the kernel to crash with an error the likes of "Kernel not syncing, attempted to kill init!", diff --git a/src/util/logging.nim b/src/util/logging.nim index 338a104..8db845f 100644 --- a/src/util/logging.nim +++ b/src/util/logging.nim @@ -15,6 +15,7 @@ # A simple logging module inspired by python's own logging facility import terminal import strformat +import posix import times @@ -73,47 +74,55 @@ proc getDefaultLogger*(): Logger = ## level that writes the given message to the ## standard error with some basic info like the ## current date and time and the log level - + setStdIoUnbuffered() # Just in case proc logTrace(self: LogHandler, logger: Logger, message: string) = setForegroundColor(fgMagenta) - stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - TRACE] {message}""") + stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - TRACE ({posix.getpid()})] {message}""") + stderr.flushFile() setForegroundColor(fgDefault) proc logDebug(self: LogHandler, logger: Logger, message: string) = setForegroundColor(fgCyan) - stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - DEBUG] {message}""") + stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - DEBUG ({posix.getpid()})] {message}""") + stderr.flushFile() setForegroundColor(fgDefault) proc logInfo(self: LogHandler, logger: Logger, message: string) = setForegroundColor(fgGreen) - stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - INFO] {message}""") + stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - INFO ({posix.getpid()})] {message}""") + stderr.flushFile() setForegroundColor(fgDefault) proc logWarning(self: LogHandler, logger: Logger, message: string) = setForegroundColor(fgYellow) - stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - WARNING] {message}""") + stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - WARNING ({posix.getpid()})] {message}""") + stderr.flushFile() setForegroundColor(fgDefault) proc logError(self: LogHandler, logger: Logger, message: string) = setForegroundColor(fgRed) - stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - ERROR] {message}""") + stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - ERROR ({posix.getpid()})] {message}""") + stderr.flushFile() setForegroundColor(fgDefault) proc logCritical(self: LogHandler, logger: Logger, message: string) = setForegroundColor(fgRed) - stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - CRITICAL] {message}""") + stderr.writeLine(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - CRITICAL ({posix.getpid()})] {message}""") + stderr.flushFile() setForegroundColor(fgDefault) proc logFatal(self: LogHandler, logger: Logger, message: string) = setForegroundColor(fgBlack) setBackgroundColor(bgRed) - stderr.write(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - FATAL]""") + stderr.write(&"""[{fromUnix(getTime().toUnixFloat().int).format("d/M/yyyy HH:mm:ss")} - FATAL ({posix.getpid()})]""") setForegroundColor(fgRed) setBackgroundColor(bgDefault) stderr.writeline(&" {message}") setForegroundColor(fgDefault) + stderr.flushFile() + result = newLogger() result.addHandler(createHandler(logTrace, LogLevel.Trace))