Skip to content

Commit

Permalink
Initial commit, spotdrain daemon
Browse files Browse the repository at this point in the history
  • Loading branch information
JoaoPPinto committed Mar 20, 2023
0 parents commit a0ced6a
Show file tree
Hide file tree
Showing 4 changed files with 346 additions and 0 deletions.
32 changes: 32 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
module spotdrain

go 1.18

require (
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.23
github.com/aws/smithy-go v1.13.5
github.com/hashicorp/nomad/api v0.0.0-20230227181747-542b23e999b5
)

require (
github.com/DataDog/zstd v1.5.0 // indirect
github.com/golang/protobuf v1.2.0 // indirect
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e // indirect
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d // indirect
google.golang.org/appengine v1.4.0 // indirect
)

require (
github.com/DataDog/datadog-api-client-go/v2 v2.9.0
github.com/aws/aws-sdk-go-v2 v1.17.5 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/hashicorp/cronexpr v1.1.1 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-rootcerts v1.0.2 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
golang.org/x/exp v0.0.0-20230108222341-4b8118a2686a // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
59 changes: 59 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/DataDog/datadog-api-client-go/v2 v2.9.0 h1:1Cz3mqj95iqnQPykEovq2p52rrU26XvLC2Fz6hPE+TU=
github.com/DataDog/datadog-api-client-go/v2 v2.9.0/go.mod h1:sHt3EuVMN8PSYJu065qwp3pZxCwR3RZP4sJnYwj/ZQY=
github.com/DataDog/zstd v1.5.0 h1:+K/VEwIAaPcHiMtQvpLD4lqW7f0Gk3xdYZmI1hD+CXo=
github.com/DataDog/zstd v1.5.0/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
github.com/aws/aws-sdk-go-v2 v1.17.5 h1:TzCUW1Nq4H8Xscph5M/skINUitxM5UBAyvm2s7XBzL4=
github.com/aws/aws-sdk-go-v2 v1.17.5/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.23 h1:Kbiv9PGnQfG/imNI4L/heyUXvzKmcWSBeDvkrQz5pFc=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.23/go.mod h1:mOtmAg65GT1HIL/HT/PynwPbS+UG0BgCZ6vhkPqnxWo=
github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8=
github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/hashicorp/cronexpr v1.1.1 h1:NJZDd87hGXjoZBdvyCF9mX4DCq5Wy7+A/w+A7q0wn6c=
github.com/hashicorp/cronexpr v1.1.1/go.mod h1:P4wA0KBl9C5q2hABiMO7cp6jcIg96CDh1Efb3g1PWA4=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/nomad/api v0.0.0-20230227181747-542b23e999b5 h1:NdIZNohnP18RFB/bpQc646tkoFVWYNhF26sAx1HAT6s=
github.com/hashicorp/nomad/api v0.0.0-20230227181747-542b23e999b5/go.mod h1:uAHGOsZaW79Na+Y2ZuGYNN8EQVf/ALPtJlErNIT9nQ4=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/shoenig/test v0.6.1 h1:TVIih3yGvaH8Yci2OedB/NAhOC9UlNi5+ajCVyMPflg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
golang.org/x/exp v0.0.0-20230108222341-4b8118a2686a h1:tlXy25amD5A7gOfbXdqCGN5k8ESEed/Ee1E5RcrYnqU=
golang.org/x/exp v0.0.0-20230108222341-4b8118a2686a/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e h1:bRhVy7zSSasaqNksaRZiA5EEI+Ei4I1nO5Jh72wfHlg=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d h1:TzXSXBo42m9gQenoE3b9BGiEpg5IG2JkU5FkPIawgtw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
242 changes: 242 additions & 0 deletions spotdrain.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
package main

import (
"context"
"encoding/json"
"errors"
"io/ioutil"
"log"
"net/http"
"os"
"os/signal"
"strings"
"syscall"
"time"

"github.com/aws/aws-sdk-go-v2/feature/ec2/imds"
"github.com/aws/smithy-go"
awshttp "github.com/aws/smithy-go/transport/http"

nomadapi "github.com/hashicorp/nomad/api"

"github.com/DataDog/datadog-api-client-go/v2/api/datadog"
ddapi "github.com/DataDog/datadog-api-client-go/v2/api/datadogV1"
)

// Global channel used to gracefully stop ticker
var done = make(chan bool)

func main() {
imdsClient := createImdsClient()
nomadClient := createNomadClient()
ddClient, ddCtx, ddEnv := createDataDogClient()

if !isSpotInstance(imdsClient) {
log.Print("This is not a spot instance. Spotdrain will not run")
log.Print("Exiting ...")
os.Exit(0)
}

instanceId := getEC2InstanceId(imdsClient)
registered, nodeId := checkNodeRegistered(nomadClient, instanceId)
if !registered {
log.Print("This instance is not registered on Nomad. Spotdrain will not run")
log.Print("Exiting...")
os.Exit(1)
}

ticker := time.NewTicker(10 * time.Second)

go func() {
for {
select {
case <-done:
log.Print("Stopping ticker")
return
case <-ticker.C:
log.Print("Waking up and executing check")
if checkMarkedForInterruption(imdsClient) {
triggerNomadNodeDrain(nomadClient, nodeId)
sendDatadogEvent(ddClient, ddCtx, instanceId, ddEnv)
log.Print("Job's done. Exiting...")
os.Exit(0)
}
}
}

}()

// Block so we don't exit
select {}
}

func init() {
c := make(chan os.Signal)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
go func() {
<-c
log.Print("Stopping ...")
done <- true
// Wait half a second for the ticker to properly close
time.Sleep(500 * time.Millisecond)
os.Exit(155)
}()
}

func createNomadClient() *nomadapi.Client {
nomadToken := getNomadTokenFromEnv()
config := nomadapi.DefaultConfig()
config.Address = "https://127.0.0.1:4646"
config.Headers = make(http.Header)
config.Headers.Add("X-Nomad-Token", nomadToken)
client, err := nomadapi.NewClient(config)
if err != nil {
log.Fatal("Fatal: ", err)
}
return client
}

func getNomadTokenFromEnv() string {
token, exists := os.LookupEnv("SPOTDRAIN_NOMAD_TOKEN")
if !exists {
log.Fatal("Fatal: Nomad Auth Token not present in environment variable SPOTDRAIN_NOMAD_TOKEN")
}
return token
}

func createImdsClient() *imds.Client {
return imds.New(imds.Options{})
}

func createDataDogClient() (*datadog.APIClient, context.Context, string) {
ctx := context.WithValue(
context.Background(),
datadog.ContextAPIKeys,
map[string]datadog.APIKey{
"apiKeyAuth": {
Key: os.Getenv("DD_CLIENT_API_KEY"),
},
"appKeyAuth": {
Key: os.Getenv("DD_CLIENT_APP_KEY"),
},
},
)

conf := datadog.NewConfiguration()
client := datadog.NewAPIClient(conf)
env, exists := os.LookupEnv("DD_ENV")
if !exists {
log.Print("WARN: Datadog env not found. Event will not have env tag")
}
return client, ctx, env
}

func isSpotInstance(client *imds.Client) bool {
mdi := &imds.GetMetadataInput{Path: "/instance-life-cycle"}
output, err := client.GetMetadata(context.TODO(), mdi)
if err != nil {
log.Fatal("Fatal: ", err)
}

bytes, err := ioutil.ReadAll(output.Content)
if err != nil {
log.Fatal("Fatal: ", err)
}

return strings.EqualFold(string(bytes), "spot")
}

func getEC2InstanceId(client *imds.Client) string {
mdi := &imds.GetMetadataInput{Path: "/instance-id"}
output, err := client.GetMetadata(context.TODO(), mdi)
if err != nil {
log.Fatal("Fatal: ", err)
}

bytes, err := ioutil.ReadAll(output.Content)
if err != nil {
log.Fatal("Fatal: ", err)
}

return string(bytes)
}

func checkMarkedForInterruption(client *imds.Client) bool {
marked := false

mdi := &imds.GetMetadataInput{Path: "/spot/instance-action"}
output, err := client.GetMetadata(context.TODO(), mdi)
if err != nil {
var oe *smithy.OperationError
if errors.As(err, &oe) {
var re *awshttp.ResponseError
if errors.As(oe.Unwrap(), &re) {
if re.HTTPStatusCode() == 404 {
log.Print("Metadata not available")
return marked
}
}
}
log.Fatal("Fatal: ", err)
}

bytes, err := ioutil.ReadAll(output.Content)
if err != nil {
log.Fatal("Fatal: ", err)
}

var ia InstanceAction
err = json.Unmarshal(bytes, &ia)
if err != nil {
log.Fatal("Fatal: ", err)
}

marked = true
log.Printf("Instance is Marked for interruption! Action: %s, Time: %s", ia.Action, ia.Time)

return marked
}

func checkNodeRegistered(client *nomadapi.Client, instanceId string) (bool, string) {
nodeList, _, err := client.Nodes().List(&nomadapi.QueryOptions{})
if err != nil {
log.Fatal("Fatal: ", err)
}
for _, node := range nodeList {
if strings.EqualFold(instanceId, node.Name) {
log.Print("This host is registered on Nomad")
return true, node.ID
}
}

log.Print("Could not find this instance registered in Nomad")
return false, ""
}
func triggerNomadNodeDrain(client *nomadapi.Client, nodeId string) {
ds := &nomadapi.DrainSpec{Deadline: 60 * time.Second, IgnoreSystemJobs: true}
wo := &nomadapi.WriteOptions{}
_, err := client.Nodes().UpdateDrain(nodeId, ds, false, wo)
if err != nil {
log.Fatal("Fatal: Error triggering Nomad Node Drain: ", err)
}
log.Println("Triggered Nomad Node Drain")
}

func sendDatadogEvent(client *datadog.APIClient, ctx context.Context, instanceId string, ddEnv string) {
tags := []string{"service:spotdrain", "spotdrain:termination_notice", "env:" + ddEnv}
er := ddapi.NewEventCreateRequest("This instance has received a termination notice from AWS EC2", "Spot-Instance-Termination-Notice")
er.SetPriority(ddapi.EVENTPRIORITY_NORMAL)
er.SetAlertType(ddapi.EVENTALERTTYPE_WARNING)
er.SetHost(instanceId)
er.SetTags(tags)
ecr, _, err := ddapi.NewEventsApi(client).CreateEvent(ctx, *er)
if err != nil {
log.Fatal("Fatal: Error creating datadog event: ", err)
}
log.Print("Sending Datadog event: ", ecr.GetStatus())
}

type InstanceAction struct {
Action string `json:"action"`
Time time.Time `json:"time"`
}
13 changes: 13 additions & 0 deletions spotdrain.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[Unit]
Description=SpotDrain, Nomad Spot Instance Notification and Eviction Daemon
After=nomad.service

[Service]
Environment=SPOTDRAIN_NOMAD_TOKEN=
Environment=DD_CLIENT_APP_KEY=
Environment=DD_CLIENT_API_KEY=
ExecStart=/opt/spotdrain/bin/spotdrain
Restart=on-failure

[Install]
WantedBy=multi-user.target

0 comments on commit a0ced6a

Please sign in to comment.