Skip to content

Commit

Permalink
Fix: --live-stats (#77)
Browse files Browse the repository at this point in the history
  • Loading branch information
CorentinB authored Jul 12, 2024
1 parent 0cbb839 commit 7c46c1f
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 139 deletions.
1 change: 1 addition & 0 deletions cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ func InitCrawlWithCMD(flags config.Flags) *crawl.Crawl {
RotateLogFile: true,
RotateElasticSearchIndex: true,
ElasticsearchConfig: elasticSearchConfig,
LiveStats: flags.LiveStats,
})
if err != nil {
fmt.Println(err)
Expand Down
17 changes: 12 additions & 5 deletions internal/pkg/crawl/hq.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ func (c *Crawl) HQWebsocket() {
GoVersion: utils.GetVersion().GoVersion,
})
if err != nil {
logrus.WithFields(c.genLogFields(err, nil, nil)).Errorln("error sending identify payload to crawl HQ, trying to reconnect..")
c.Log.WithFields(c.genLogFields(err, nil, map[string]interface{}{})).Error("error sending identify payload to crawl HQ, trying to reconnect..")

err = c.HQClient.InitWebsocketConn()
if err != nil {
logrus.WithFields(c.genLogFields(err, nil, nil)).Errorln("error initializing websocket connection to crawl HQ")
c.Log.WithFields(c.genLogFields(err, nil, map[string]interface{}{})).Error("error initializing websocket connection to crawl HQ")
}
}

Expand Down Expand Up @@ -72,7 +73,7 @@ func (c *Crawl) HQProducer() {
for {
_, err := c.HQClient.Discovered(discoveredArray, "seed", false, false)
if err != nil {
logrus.WithFields(c.genLogFields(err, nil, nil)).Errorln("error sending payload to crawl HQ, waiting 1s then retrying..")
c.Log.WithFields(c.genLogFields(err, nil, map[string]interface{}{})).Error("error sending payload to crawl HQ, waiting 1s then retrying..")
time.Sleep(time.Second)
continue
}
Expand All @@ -87,7 +88,7 @@ func (c *Crawl) HQProducer() {
for {
_, err := c.HQClient.Discovered(discoveredArray, "seed", false, false)
if err != nil {
logrus.WithFields(c.genLogFields(err, nil, nil)).Errorln("error sending payload to crawl HQ, waiting 1s then retrying..")
c.Log.WithFields(c.genLogFields(err, nil, map[string]interface{}{})).Error("error sending payload to crawl HQ, waiting 1s then retrying..")
time.Sleep(time.Second)
continue
}
Expand Down Expand Up @@ -125,7 +126,9 @@ func (c *Crawl) HQProducer() {
for {
_, err := c.HQClient.Discovered([]gocrawlhq.URL{discoveredURL}, "seed", true, false)
if err != nil {
logrus.WithFields(c.genLogFields(err, nil, nil)).Errorln("error sending payload to crawl HQ, waiting 1s then retrying..")
c.Log.WithFields(c.genLogFields(err, nil, map[string]interface{}{
"bypassSeencheck": discoveredItem.BypassSeencheck,
})).Error("error sending payload to crawl HQ, waiting 1s then retrying..")
time.Sleep(time.Second)
continue
}
Expand Down Expand Up @@ -177,6 +180,10 @@ func (c *Crawl) HQConsumer() {
// get batch from crawl HQ
batch, err := c.HQClient.Feed(HQBatchSize, c.HQStrategy)
if err != nil {
if strings.Contains(err.Error(), "feed is empty") {
time.Sleep(time.Second)
}

c.Log.WithFields(c.genLogFields(err, nil, map[string]interface{}{
"batchSize": HQBatchSize,
})).Error("error getting new URLs from crawl HQ")
Expand Down
11 changes: 7 additions & 4 deletions internal/pkg/log/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type Config struct {
RotateLogFile bool
ElasticsearchConfig *ElasticsearchConfig
RotateElasticSearchIndex bool
LiveStats bool
}

// New creates a new Logger instance with the given configuration.
Expand All @@ -60,10 +61,12 @@ func New(cfg Config) (*Logger, error) {
var handlers []slog.Handler

// Create stdout handler
stdoutHandler := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
Level: cfg.StdoutLevel,
})
handlers = append(handlers, stdoutHandler)
if !cfg.LiveStats {
stdoutHandler := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
Level: cfg.StdoutLevel,
})
handlers = append(handlers, stdoutHandler)
}

// Create file handler if FileOutput is specified
if cfg.FileConfig != nil {
Expand Down
6 changes: 4 additions & 2 deletions internal/pkg/utils/url_string.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package utils

import (
"fmt"
"log/slog"
"net/url"
"strings"

Expand All @@ -14,12 +16,12 @@ func URLToString(u *url.URL) string {
u.RawQuery = q.Encode()
u.Host, err = idna.ToASCII(u.Host)
if err != nil {
LogWarning.Warningf("could not IDNA encode URL: %s", err)
slog.Warn(fmt.Sprintf("could not IDNA encode URL: %s", err))
}

tempHost, err := idna.ToASCII(u.Hostname())
if err != nil {
LogWarning.Warningf("could not IDNA encode URL: %s", err)
slog.Warn(fmt.Sprintf("could not IDNA encode URL: %s", err))
tempHost = u.Hostname()
}

Expand Down
128 changes: 0 additions & 128 deletions internal/pkg/utils/utils.go

This file was deleted.

0 comments on commit 7c46c1f

Please sign in to comment.