2021-12-04 13:47:06 +01:00
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import strformat
2021-12-06 13:47:11 +01:00
import strutils
2021-12-04 13:47:06 +01:00
import cpuinfo
import tables
import osproc
import posix
import shlex
2021-12-05 20:53:44 +01:00
import os
proc strsignal ( sig : cint ) : cstring {. header : " string.h " , importc . }
2021-12-04 13:47:06 +01:00
import .. / util / logging
type
RunLevel * = enum
## Enum of possible runlevels
Boot , Default , Shutdown
ServiceKind * = enum
## Enumerates all service
## types
Oneshot , Simple
2021-12-05 20:53:44 +01:00
RestartKind * = enum
## Enum of possible restart modes
Always , OnFailure , Never
2021-12-04 13:47:06 +01:00
Service * = ref object of RootObj
## A service object
name : string
description : string
kind : ServiceKind
workDir : string
runlevel : RunLevel
exec : string
supervised : bool
2021-12-05 20:53:44 +01:00
restart : RestartKind
2021-12-04 13:47:06 +01:00
restartDelay : int
2021-12-06 20:45:53 +01:00
depends * : seq [ Service ]
provides * : seq [ Service ]
2021-12-04 13:47:06 +01:00
2021-12-06 20:45:53 +01:00
proc newService * ( name , description : string , kind : ServiceKind , workDir : string , runlevel : RunLevel , exec : string , supervised : bool , restart : RestartKind ,
restartDelay : int , depends , provides : seq [ Service ] ) : Service =
2021-12-04 13:47:06 +01:00
## Creates a new service object
result = Service ( name : name , description : description , kind : kind , workDir : workDir , runLevel : runLevel ,
2021-12-06 20:45:53 +01:00
exec : exec , supervised : supervised , restart : restart , restartDelay : restartDelay ,
depends : depends , provides : provides )
result . provides . add ( result )
proc extend [ T ] ( self : var seq [ T ] , other : seq [ T ] ) =
## Extends self with the elements of other
for el in other :
self . add ( el )
2021-12-04 13:47:06 +01:00
var services : seq [ Service ] = @ [ ]
var processIDs : TableRef [ int , Service ] = newTable [ int , Service ] ( )
2021-12-06 20:45:53 +01:00
proc resolveDependencies ( logger : Logger , node : Service , resolved , unresolved : var seq [ Service ] ) =
## Resolves dependencies and modifies the resolved
## parameter in place to a list that satisfies the
## dependency tree. This is basically traversing
## a directed cyclic graph, although note that cycles
## in our graph are errors and cause the dependants and
## the providers to be skipped and an error to be logged
# Note: It turns out this is an NP-hard problem (see https://stackoverflow.com/a/28102139/12159081),
# so hopefully this doesn't blow up. No wonder runit doesn't do any dependency resolution, lol.
# The algorithm comes from https://www.electricmonk.nl/log/2008/08/07/dependency-resolving-algorithm/
# and has been extended to support the dependent-provider paradigm
var ok = true
unresolved . add ( node )
for dependency in node . depends :
if dependency notin resolved :
if dependency in unresolved :
logger . error ( & " Could not resolve dependencies for ' {node.name} ' -> ' {dependency.name} ' : cyclic dependency detected " )
ok = false
continue
resolveDependencies ( logger , dependency , resolved , unresolved )
for dependency in node . provides :
if dependency = = node :
continue
resolveDependencies ( logger , dependency , resolved , unresolved )
if ok :
resolved . add ( node )
unresolved . del ( unresolved . find ( node ) )
2021-12-04 13:47:06 +01:00
proc isManagedProcess * ( pid : int ) : bool =
## Returns true if the given process
## id is associated to a supervised
## NimD service
result = pid in processIDs
proc getManagedProcess * ( pid : int ) : Service =
## Returns a managed process by its PID.
## Returns nil if the given pid doesn't
## belong to a managed process
result = if pid . isManagedProcess ( ) : processIDs [ pid ] else : nil
proc removeManagedProcess * ( pid : int ) =
## Removes a managed process entry
## from the table
if pid . isManagedProcess ( ) :
processIDs . del ( pid )
proc addManagedProcess * ( pid : int , service : Service ) =
## Adds a managed process to the
## table
processIDs [ pid ] = service
proc addService * ( service : Service ) =
## Adds a service to be started when
## its runlevel is processed
services . add ( service )
proc removeService * ( service : Service ) =
## Unregisters a service from being
## started (has no effect after services
## have already been started)
for i , serv in services :
if serv = = service :
services . del ( i )
break
proc supervisorWorker ( logger : Logger , service : Service , pid : int ) =
## This is the actual worker that supervises the service process
2021-12-04 17:31:18 +01:00
logger . trace ( & " New supervisor for service ' {service.name} ' has been spawned " )
2021-12-04 13:47:06 +01:00
var pid = pid
var status : cint
var returnCode : int
var sig : int
2021-12-04 17:31:18 +01:00
var process : Process
2021-12-06 13:47:11 +01:00
logger . debug ( " Switching logs to file " )
2021-12-05 22:08:08 +01:00
logger . switchToFile ( )
2021-12-04 13:47:06 +01:00
while true :
2021-12-06 13:47:11 +01:00
logger . trace ( & " Calling waitpid() on {pid} " )
2021-12-04 17:31:18 +01:00
returnCode = posix . waitPid ( cint ( pid ) , status , WUNTRACED )
2021-12-04 13:47:06 +01:00
if WIFEXITED ( status ) :
sig = 0
elif WIFSIGNALED ( status ) :
sig = WTERMSIG ( status )
else :
sig = - 1
2021-12-06 13:47:11 +01:00
logger . trace ( & " Call to waitpid() set status to {status} and returned {returnCode}, setting sig to {sig} " )
2021-12-05 20:53:44 +01:00
case service . restart :
of Never :
logger . info ( & " Service ' {service.name} ' ({returnCode}) has exited, shutting down controlling process " )
2021-12-04 17:31:18 +01:00
break
2021-12-05 20:53:44 +01:00
of Always :
if sig > 0 :
logger . info ( & " Service ' {service.name} ' ({returnCode}) has crashed (terminated by signal {sig}: {strsignal(cint(sig))}), sleeping {service.restartDelay} seconds before restarting it " )
elif sig = = 0 :
logger . info ( & " Service ' {service.name} ' has exited gracefully, sleeping {service.restartDelay} seconds before restarting it " )
else :
logger . info ( & " Service ' {service.name} ' has exited, sleeping {service.restartDelay} seconds before restarting it " )
removeManagedProcess ( pid )
sleep ( service . restartDelay * 1000 )
var split = shlex ( service . exec )
if split . error :
logger . error ( & " Error while restarting service ' {service.name} ' : invalid exec syntax " )
break
var arguments = split . words
let progName = arguments [ 0 ]
arguments = arguments [ 1 .. ^ 1 ]
process = startProcess ( progName , workingDir = service . workDir , args = arguments )
pid = process . processID ( )
of OnFailure :
if sig > 0 :
logger . info ( & " Service ' {service.name} ' ({returnCode}) has crashed (terminated by signal {sig}: {strsignal(cint(sig))}), sleeping {service.restartDelay} seconds before restarting it " )
removeManagedProcess ( pid )
sleep ( service . restartDelay * 1000 )
var split = shlex ( service . exec )
if split . error :
logger . error ( & " Error while restarting service ' {service.name} ' : invalid exec syntax " )
break
var arguments = split . words
let progName = arguments [ 0 ]
arguments = arguments [ 1 .. ^ 1 ]
process = startProcess ( progName , workingDir = service . workDir , args = arguments )
pid = process . processID ( )
2021-12-04 17:31:18 +01:00
if process ! = nil :
process . close ( )
2021-12-04 13:47:06 +01:00
proc startService ( logger : Logger , service : Service ) =
## Starts a single service (this is called by
## startServices below until all services have
2021-12-05 20:53:44 +01:00
## been started). This function is supposed to
2021-12-06 13:47:11 +01:00
## be called from a forked process and it itself
## forks to call supervisorWorker if the service
## is a supervised one
2021-12-04 17:31:18 +01:00
var process : Process
2021-12-04 13:47:06 +01:00
try :
2021-12-04 17:31:18 +01:00
var split = shlex ( service . exec )
if split . error :
logger . error ( & " Error while starting service ' {service.name} ' : invalid exec syntax " )
quit ( 0 )
var arguments = split . words
let progName = arguments [ 0 ]
arguments = arguments [ 1 .. ^ 1 ]
process = startProcess ( progName , workingDir = service . workDir , args = arguments )
2021-12-06 20:45:53 +01:00
if service . supervised and service . kind ! = Oneshot :
2021-12-05 20:53:44 +01:00
var pid = posix . fork ( )
if pid = = 0 :
2021-12-06 13:47:11 +01:00
logger . trace ( & " New child has been spawned " )
2021-12-05 20:53:44 +01:00
supervisorWorker ( logger , service , process . processID )
2021-12-04 17:31:18 +01:00
# If the service is unsupervised we just exit
except :
2021-12-04 13:47:06 +01:00
logger . error ( & " Error while starting service {service.name}: {getCurrentExceptionMsg()} " )
2021-12-04 17:31:18 +01:00
if process ! = nil :
process . close ( )
2021-12-04 13:47:06 +01:00
quit ( 0 )
2021-12-04 17:31:18 +01:00
proc startServices * ( logger : Logger , level : RunLevel , workers : int = 1 ) =
## Starts the registered services in the
## given runlevel
2021-12-06 20:45:53 +01:00
var resolved : seq [ Service ] = @ [ ]
var unresolved : seq [ Service ] = @ [ ]
resolveDependencies ( logger , services [ 0 ] , resolved , unresolved )
2021-12-05 22:08:08 +01:00
if workers > cpuinfo . countProcessors ( ) :
2021-12-06 13:47:11 +01:00
logger . warning ( & " The configured number of workers ({workers}) is greater than the number of CPU cores ({cpuinfo.countProcessors()}), performance may degrade " )
2021-12-04 17:31:18 +01:00
var workerCount : int = 0
var status : cint
2021-12-04 13:47:06 +01:00
var pid : int = posix . fork ( )
if pid = = - 1 :
2021-12-04 17:31:18 +01:00
logger . error ( & " Error, cannot fork: {posix.strerror(posix.errno)} " )
2021-12-04 13:47:06 +01:00
elif pid = = 0 :
2021-12-04 17:31:18 +01:00
logger . debug ( " Started service spawner process " )
var servicesCopy : seq [ Service ] = @ [ ]
for service in services :
if service . runlevel = = level :
servicesCopy . add ( service )
while servicesCopy . len ( ) > 0 :
if workerCount = = workers :
2021-12-06 13:47:11 +01:00
logger . debug ( & " Worker queue full, waiting for some worker to exit... " )
logger . trace ( & " Calling waitpid() on {pid} " )
var returnCode = waitPid ( cint ( pid ) , status , WUNTRACED )
logger . trace ( & " Call to waitpid() set status to {status} and returned {returnCode} " )
2021-12-04 17:31:18 +01:00
dec ( workerCount )
2021-12-04 13:47:06 +01:00
pid = posix . fork ( )
if pid = = - 1 :
logger . error ( & " An error occurred while forking to spawn services, trying again: {posix.strerror(posix.errno)} " )
elif pid = = 0 :
2021-12-04 17:31:18 +01:00
logger . trace ( & " New child has been spawned " )
2021-12-06 20:45:53 +01:00
if not servicesCopy [ 0 ] . supervised or servicesCopy [ 0 ] . kind = = Oneshot :
logger . info ( & """ Starting {(if servicesCopy[0].kind != Oneshot: " unsupervised " else: " oneshot " )} service ' {servicesCopy[0].name} ' """ )
2021-12-04 17:31:18 +01:00
else :
logger . info ( & " Starting supervised service ' {servicesCopy[0].name} ' " )
2021-12-04 13:47:06 +01:00
startService ( logger , servicesCopy [ 0 ] )
2021-12-04 17:31:18 +01:00
elif servicesCopy . len ( ) > 0 :
workerCount + = 1
if servicesCopy [ 0 ] . supervised :
2021-12-04 13:47:06 +01:00
addManagedProcess ( pid , servicesCopy [ 0 ] )
2021-12-04 17:31:18 +01:00
servicesCopy . del ( 0 )
quit ( 0 )
else :
2021-12-06 13:47:11 +01:00
logger . debug ( & " Waiting for completion of service spawning in runlevel {( $level ).toLowerAscii()} " )
logger . trace ( & " Calling waitpid() on {pid} " )
var returnCode = waitPid ( cint ( pid ) , status , WUNTRACED )
logger . trace ( & " Call to waitpid() set status to {status} and returned {returnCode} " )