mukan-consensus/cmd/cometbft/commands/debug/kill.go
Mukan Erkin Törük c6a41110d1
Some checks are pending
docker-build-cometbft / vars (push) Waiting to run
docker-build-cometbft / build-images (amd64, ubuntu-24.04) (push) Blocked by required conditions
docker-build-cometbft / build-images (arm64, ubuntu-24.04-arm) (push) Blocked by required conditions
docker-build-cometbft / merge-images (push) Blocked by required conditions
docker-build-e2e-node / vars (push) Waiting to run
docker-build-e2e-node / build-images (amd64, ubuntu-24.04) (push) Blocked by required conditions
docker-build-e2e-node / build-images (arm64, ubuntu-24.04-arm) (push) Blocked by required conditions
docker-build-e2e-node / merge-images (push) Blocked by required conditions
refactor: replace all github.com upstream refs with git.cw.tr/mukan-network
2026-05-11 03:36:20 +03:00

152 lines
4.2 KiB
Go

package debug
import (
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"syscall"
"time"
"github.com/spf13/cobra"
"github.com/spf13/viper"
cfg "git.cw.tr/mukan-network/mukan-consensus/config"
"git.cw.tr/mukan-network/mukan-consensus/libs/cli"
rpchttp "git.cw.tr/mukan-network/mukan-consensus/rpc/client/http"
)
var killCmd = &cobra.Command{
Use: "kill [pid] [compressed-output-file]",
Short: "Kill a CometBFT process while aggregating and packaging debugging data",
Long: `Kill a CometBFT process while also aggregating CometBFT process data
such as the latest node state, including consensus and networking state,
go-routine state, and the node's WAL and config information. This aggregated data
is packaged into a compressed archive.
Example:
$ cometbft debug 34255 /path/to/cmt-debug.zip`,
Args: cobra.ExactArgs(2),
RunE: killCmdHandler,
}
func killCmdHandler(_ *cobra.Command, args []string) error {
pid, err := strconv.Atoi(args[0])
if err != nil {
return err
}
outFile := args[1]
if outFile == "" {
return errors.New("invalid output file")
}
rpc, err := rpchttp.New(nodeRPCAddr, "/websocket")
if err != nil {
return fmt.Errorf("failed to create new http client: %w", err)
}
home := viper.GetString(cli.HomeFlag)
conf := cfg.DefaultConfig()
conf = conf.SetRoot(home)
cfg.EnsureRoot(conf.RootDir)
// Create a temporary directory which will contain all the state dumps and
// relevant files and directories that will be compressed into a file.
tmpDir, err := os.MkdirTemp(os.TempDir(), "cometbft_debug_tmp")
if err != nil {
return fmt.Errorf("failed to create temporary directory: %w", err)
}
defer os.RemoveAll(tmpDir)
logger.Info("getting node status...")
if err := dumpStatus(rpc, tmpDir, "status.json"); err != nil {
return err
}
logger.Info("getting node network info...")
if err := dumpNetInfo(rpc, tmpDir, "net_info.json"); err != nil {
return err
}
logger.Info("getting node consensus state...")
if err := dumpConsensusState(rpc, tmpDir, "consensus_state.json"); err != nil {
return err
}
logger.Info("copying node WAL...")
if err := copyWAL(conf, tmpDir); err != nil {
return err
}
logger.Info("copying node configuration...")
if err := copyConfig(home, tmpDir); err != nil {
return err
}
logger.Info("killing CometBFT process")
if err := killProc(pid, tmpDir); err != nil {
return err
}
logger.Info("archiving and compressing debug directory...")
return zipDir(tmpDir, outFile)
}
// killProc attempts to kill the CometBFT process with a given PID with an
// ABORT signal which should result in a goroutine stacktrace. The PID's STDERR
// is tailed and piped to a file under the directory dir. An error is returned
// if the output file cannot be created or the tail command cannot be started.
// An error is not returned if any subsequent syscall fails.
func killProc(pid int, dir string) error {
// pipe STDERR output from tailing the CometBFT process to a file
//
// NOTE: This will only work on UNIX systems.
cmd := exec.Command("tail", "-f", fmt.Sprintf("/proc/%d/fd/2", pid)) //nolint: gosec
outFile, err := os.Create(filepath.Join(dir, "stacktrace.out"))
if err != nil {
return err
}
defer outFile.Close()
cmd.Stdout = outFile
cmd.Stderr = outFile
if err := cmd.Start(); err != nil {
return err
}
// kill the underlying CometBFT process and subsequent tailing process
go func() {
// Killing the CometBFT process with the '-ABRT|-6' signal will result in
// a goroutine stacktrace.
p, err := os.FindProcess(pid)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to find PID to kill CometBFT process: %s", err)
} else if err = p.Signal(syscall.SIGABRT); err != nil {
fmt.Fprintf(os.Stderr, "failed to kill CometBFT process: %s", err)
}
// allow some time to allow the CometBFT process to be killed
//
// TODO: We should 'wait' for a kill to succeed (e.g. poll for PID until it
// cannot be found). Regardless, this should be ample time.
time.Sleep(5 * time.Second)
if err := cmd.Process.Kill(); err != nil {
fmt.Fprintf(os.Stderr, "failed to kill CometBFT process output redirection: %s", err)
}
}()
if err := cmd.Wait(); err != nil {
// only return an error not invoked by a manual kill
if _, ok := err.(*exec.ExitError); !ok {
return err
}
}
return nil
}