diff --git a/.ci/check_new_rules.go b/.ci/check_new_rules.go index 9561c5ce..5abae2f3 100644 --- a/.ci/check_new_rules.go +++ b/.ci/check_new_rules.go @@ -12,7 +12,7 @@ import ( var ( regexGitleaksRules = regexp.MustCompile(`(?m)^[^/\n\r]\s*rules\.([a-zA-Z0-9_]+)\(`) - regex2msRules = regexp.MustCompile(`(?m)^[^/\n\r]\s*(?:// )?{Rule:\s*\*rules\.([a-zA-Z0-9_]+)\(\),`) + regex2msRules = regexp.MustCompile(`(?m)^[^/\n\r]\s*(?:// )?{Rule:\s*\*(?:rules\.)?([a-zA-Z0-9_]+)\(\),`) ) func main() { diff --git a/engine/rules/plaid.go b/engine/rules/plaid.go new file mode 100644 index 00000000..4c6ba51e --- /dev/null +++ b/engine/rules/plaid.go @@ -0,0 +1,27 @@ +package rules + +import ( + "github.com/zricethezav/gitleaks/v8/cmd/generate/secrets" + "github.com/zricethezav/gitleaks/v8/config" +) + +// Using this local version because gitleaks has entropy as 3.5, which causes issues on this rule's validation +func PlaidAccessID() *config.Rule { + // define rule + r := config.Rule{ + RuleID: "plaid-client-id", + Description: "Uncovered a Plaid Client ID, which could lead to unauthorized financial service integrations and data breaches.", + Regex: generateSemiGenericRegex([]string{"plaid"}, alphaNumeric("24"), true), + + Entropy: 3.0, + Keywords: []string{ + "plaid", + }, + } + + // validate + tps := []string{ + generateSampleSecret("plaid", secrets.NewSecret(alphaNumeric("24"))), + } + return validate(r, tps, nil) +} diff --git a/engine/rules/rule.go b/engine/rules/rule.go index 5de08dd7..0d704190 100644 --- a/engine/rules/rule.go +++ b/engine/rules/rule.go @@ -30,12 +30,20 @@ func validate(r config.Rule, truePositives []string, falsePositives []string) *c }) for _, tp := range truePositives { if len(d.DetectString(tp)) != 1 { - log.Fatal().Msgf("Failed to validate. For rule ID [%s], true positive [%s] was not detected by regexp [%s]", r.RuleID, tp, r.Regex) // lint:ignore This Fatal happens in a test + log.Fatal(). // lint:ignore This Fatal happens in a test + Str("rule", r.RuleID). + Str("value", tp). + Str("regex", r.Regex.String()). + Msg("Failed to Validate. True positive was not detected by regex.") } } for _, fp := range falsePositives { if len(d.DetectString(fp)) != 0 { - log.Fatal().Msgf("Failed to validate. For rule ID [%s], false positive [%s] was detected by regexp [%s]", r.RuleID, fp, r.Regex) // lint:ignore This Fatal happens in a test + log.Fatal(). // lint:ignore This Fatal happens in a test + Str("rule", r.RuleID). + Str("value", fp). + Str("regex", r.Regex.String()). + Msg("Failed to Validate. False positive was detected by regex.") } } return &r diff --git a/engine/rules/rules.go b/engine/rules/rules.go index ea4e4699..70b0f167 100644 --- a/engine/rules/rules.go +++ b/engine/rules/rules.go @@ -136,7 +136,7 @@ func getDefaultRules() *[]Rule { {Rule: *rules.NytimesAccessToken(), Tags: []string{TagAccessToken}}, {Rule: *rules.OktaAccessToken(), Tags: []string{TagAccessToken}}, {Rule: *rules.OpenAI(), Tags: []string{TagApiKey}}, - {Rule: *rules.PlaidAccessID(), Tags: []string{TagClientId}}, + {Rule: *PlaidAccessID(), Tags: []string{TagClientId}}, // {Rule: *rules.PlaidSecretKey(), Tags: []string{TagSecretKey}}, https://github.com/Checkmarx/2ms/issues/226 // {Rule: *rules.PlaidAccessToken(), Tags: []string{TagApiToken}}, https://github.com/Checkmarx/2ms/issues/226 {Rule: *rules.PlanetScalePassword(), Tags: []string{TagPassword}}, @@ -190,7 +190,7 @@ func getDefaultRules() *[]Rule { {Rule: *rules.TwitterBearerToken(), Tags: []string{TagApiToken}}, {Rule: *rules.Typeform(), Tags: []string{TagApiToken}}, {Rule: *rules.VaultBatchToken(), Tags: []string{TagApiToken}}, - {Rule: *rules.VaultServiceToken(), Tags: []string{TagApiToken}}, + {Rule: *VaultServiceToken(), Tags: []string{TagApiToken}}, {Rule: *rules.YandexAPIKey(), Tags: []string{TagApiKey}}, {Rule: *rules.YandexAWSAccessToken(), Tags: []string{TagAccessToken}}, {Rule: *rules.YandexAccessToken(), Tags: []string{TagAccessToken}}, diff --git a/engine/rules/utils.go b/engine/rules/utils.go new file mode 100644 index 00000000..5406ec7f --- /dev/null +++ b/engine/rules/utils.go @@ -0,0 +1,75 @@ +package rules + +import ( + "fmt" + "regexp" + "strings" +) + +const ( + // case insensitive prefix + caseInsensitive = `(?i)` + + // identifier prefix (just an ignore group) + identifierCaseInsensitivePrefix = `(?i:` + identifierCaseInsensitiveSuffix = `)` + identifierPrefix = `(?:` + identifierSuffix = `)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}` + + // commonly used assignment operators or function call + operator = `(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)` + + // boundaries for the secret + // \x60 = ` + secretPrefixUnique = `\b(` + secretPrefix = `(?:'|\"|\s|=|\x60){0,5}(` + secretSuffix = `)(?:['|\"|\n|\r|\s|\x60|;]|$)` +) + +func generateSemiGenericRegex(identifiers []string, secretRegex string, isCaseInsensitive bool) *regexp.Regexp { + var sb strings.Builder + // The identifiers should always be case-insensitive. + // This is inelegant but prevents an extraneous `(?i:)` from being added to the pattern; it could be removed. + if isCaseInsensitive { + sb.WriteString(caseInsensitive) + writeIdentifiers(&sb, identifiers) + } else { + sb.WriteString(identifierCaseInsensitivePrefix) + writeIdentifiers(&sb, identifiers) + sb.WriteString(identifierCaseInsensitiveSuffix) + } + sb.WriteString(operator) + sb.WriteString(secretPrefix) + sb.WriteString(secretRegex) + sb.WriteString(secretSuffix) + return regexp.MustCompile(sb.String()) +} + +func writeIdentifiers(sb *strings.Builder, identifiers []string) { + sb.WriteString(identifierPrefix) + sb.WriteString(strings.Join(identifiers, "|")) + sb.WriteString(identifierSuffix) +} + +func generateUniqueTokenRegex(secretRegex string, isCaseInsensitive bool) *regexp.Regexp { + var sb strings.Builder + if isCaseInsensitive { + sb.WriteString(caseInsensitive) + } + sb.WriteString(secretPrefixUnique) + sb.WriteString(secretRegex) + sb.WriteString(secretSuffix) + return regexp.MustCompile(sb.String()) +} + +func generateSampleSecret(identifier string, secret string) string { + return fmt.Sprintf("%s_api_token = \"%s\"", identifier, secret) +} + +func alphaNumeric(size string) string { + return fmt.Sprintf(`[a-z0-9]{%s}`, size) +} + +func alphaNumericExtendedShort(size string) string { + return fmt.Sprintf(`[a-z0-9_-]{%s}`, size) +} diff --git a/engine/rules/vault.go b/engine/rules/vault.go new file mode 100644 index 00000000..d6b73211 --- /dev/null +++ b/engine/rules/vault.go @@ -0,0 +1,25 @@ +package rules + +import ( + "github.com/zricethezav/gitleaks/v8/cmd/generate/secrets" + "github.com/zricethezav/gitleaks/v8/config" +) + +// Using this local version because newer versions of gitleaks have an entropy value, which was set as too high +// It's here as prevention in case a newer version of gitleaks starts getting used and causes issues on this rule +// If gitleaks is updated on 2ms and the new version of this rule has entropy, set it to 3.0 +func VaultServiceToken() *config.Rule { + // define rule + r := config.Rule{ + Description: "Identified a Vault Service Token, potentially compromising infrastructure security and access to sensitive credentials.", + RuleID: "vault-service-token", + Regex: generateUniqueTokenRegex(`hvs\.[a-z0-9_-]{90,100}`, true), + Keywords: []string{"hvs"}, + } + + // validate + tps := []string{ + generateSampleSecret("vault", "hvs."+secrets.NewSecret(alphaNumericExtendedShort("90"))), + } + return validate(r, tps, nil) +}