From bd593484e0a12168b0e9324a3c0c11f94d5a4bc9 Mon Sep 17 00:00:00 2001
From: Chikara Takahashi <chtakahashi10@gmail.com>
Date: Tue, 30 Jan 2024 15:46:31 -0500
Subject: [PATCH] docs: add a page on the dependency graph

---
 docs/infrasec/terraform/dependency-graph.md | 95 +++++++++++++++++++++
 docs/infrasec/terraform/naming.md           |  3 +
 docs/infrasec/terraform/style-guide.md      | 12 ++-
 3 files changed, 108 insertions(+), 2 deletions(-)
 create mode 100644 docs/infrasec/terraform/dependency-graph.md

diff --git a/docs/infrasec/terraform/dependency-graph.md b/docs/infrasec/terraform/dependency-graph.md
new file mode 100644
index 00000000..dc737a41
--- /dev/null
+++ b/docs/infrasec/terraform/dependency-graph.md
@@ -0,0 +1,95 @@
+# Terraform Dependency Graph
+
+## Reading
+Read [this page](https://developer.hashicorp.com/terraform/internals/graph) as a primer on what this all means.
+
+## In practice
+It's difficult to be exhaustive on how this all works.
+
+### Using outputs for resources to pass to other resources
+There are some AWS resources that accept inputs of some "name-like" argument, and it might seem easier to use that name directly in other places as well. The issue arises where there are specific resources that need to be present before others in order for the provisioning to work in one `terraform apply`.
+
+A simple example of this is the need to create an `aws_iam_user` resource before trying to attach the user to an `aws_iam_group`. So as an example, here is an example configuration that might be unsuccessful:
+```hcl
+resource "aws_iam_user" "jdoe" {
+  name = "jdoe"
+  tags = {
+    Slack = "<jdoe's slack ID>
+  }
+}
+
+resource "aws_iam_group" "user_group" {
+  name = var.group_name
+}
+
+resource "aws_iam_group_membership" "user_group" {
+  name  = "${var.group_name}-membership"
+  users = ["jdoe"]
+
+  group = aws_iam_group.user_group.name
+}
+```
+
+In the above example, you have both the user and the group membership dependent on a string value. By changing the final block to be:
+```hcl
+resource "aws_iam_group_membership" "user_group" {
+  name  = "${var.group_name}-membership"
+  users = [aws_iam_user.jdoe.name]
+
+  group = aws_iam_group.user_group.name
+}
+```
+
+you can gate the creation of the group membership behind the creation of the user.
+
+### Using outputs for resources to pass to a local variable
+Here's an example of something that doesn't work. Terraform currently is not able to sequentially apply things. When you're using local variables, the value of those locals need to be evaluated prior to the execution of the plan, otherwise the plan will fail. Here is an example:
+```hcl
+module "app_bucket" {
+  source                   = "trussworks/s3-private-bucket/aws"
+  version                  = "~> 7.1"
+  bucket                   = "${var.app}-${var.environment}-${var.region}"
+  logging_bucket           = module.app_log_bucket.aws_logs_bucket
+  use_account_alias_prefix = "false"
+}
+
+module "app_log_bucket" {
+  source         = "trussworks/logs/aws"
+  version        = "~> 16.2"
+  s3_bucket_name = "${var.app}-${var.environment}-logs"
+  allow_alb      = true
+  alb_logs_prefixes = [
+    "alb/${var.app}-${var.environment}",
+  ]
+}
+```
+
+The `logging_bucket` argument in the first module is used in the module as part of a [local variable declaration](https://github.com/trussworks/terraform-aws-s3-private-bucket/blob/1bfbbf320479bde1e78b16872a83fab1ab9d3792/main.tf#L11) (also pasted below):
+```hcl
+locals {
+  
+  ...
+  
+  enable_bucket_logging = var.logging_bucket != ""
+}
+```
+
+In the example above, you are essentially asking Terraform to complete the infrastructure deploy of the `app_log_bucket` module before even considering the dependency graph of `app_bucket`. Trying to apply the above produces the following error:
+```
+│ The "count" value depends on resource attributes that cannot be determined until apply, so
+│ Terraform cannot predict how many instances will be created. To work around this, use the
+│ -target argument to first apply only the resources that the count depends on.
+```
+
+The solution for this is to do one of the following:
+1. Ensure that all local variables declarations are defined at the time you run `terraform apply`
+2. Acknowledge that this requires the use of the `-target` flag to apply specific resources first (in this case `app_log_bucket`)
+
+Solution #2 isn't as bad as it sounds, because a common pattern at Truss is to set up terraform directories in the following order:
+    - `/bootstrap` - to set up the terraform backend in S3/DynamoDB
+    - `/admin-global` - to set up static infrastructure and logging mechanisms
+    - `/app` - to set up the application specific infrastructure and things that are more dynamic.
+and a logging bucket would typically fall under `/admin-global`. This way you can avoid the use of the `-target` flag and just apply the directories sequentially.
+
+### A specific example for avoiding the use of -target or multiple applies
+There are times where a resource's inputs and attributes might feel a bit opaque, so you prefer to use strings instead. Here is an example of where this might fall apart. Let's say that 
\ No newline at end of file
diff --git a/docs/infrasec/terraform/naming.md b/docs/infrasec/terraform/naming.md
index 7bddaf84..75b3fa3d 100644
--- a/docs/infrasec/terraform/naming.md
+++ b/docs/infrasec/terraform/naming.md
@@ -1,11 +1,13 @@
 # Naming conventions for Terraform
 
 ## Resource naming
+
 - Always use underscores in resource names, consistent with the resource type.
 - Always use dashes in arguments, values, and places where values will be exposed to a human or read in the AWS console. This is also important because some AWS resources have restrictions on allowed characters in description values, and the error messages that these cause can be opaque.
 - Use descriptive singular nouns for resource names.
   - Do not repeat the resource type in the resource name
   - Within reason, do not use environment names in resource names either. Exceptions might include if you are working in one terraform directory that provisions the entirety of the AWS account and you have no choice :)
+
 ```hcl
 #
 # Good example
@@ -25,6 +27,7 @@ resource "aws_instance" "jenkins_ec2_instance_staging" {
   ...
 }
 ```
+
 Please refer to the following sheet for various naming conventions, including of terraform modules.
 
 [Infrasec Naming Conventions](../aws/naming.md)
diff --git a/docs/infrasec/terraform/style-guide.md b/docs/infrasec/terraform/style-guide.md
index abe1eed9..3533219f 100644
--- a/docs/infrasec/terraform/style-guide.md
+++ b/docs/infrasec/terraform/style-guide.md
@@ -3,11 +3,13 @@
 ## Style and Organization
 
 ### Basics
+
 As a starting point, follow the [basic Terraform Style Conventions](https://developer.hashicorp.com/terraform/language/syntax/style). These can be enforced automatically by running `terraform fmt`. This formatting can also be done via [pre-commit hooks](https://github.com/antonbabenko/pre-commit-terraform?tab=readme-ov-file#terraform_fmt).
 
 ### File Tree Structure/Naming
 
 A terraform directory generally should start off looking like this:
+
 ```text
 main.tf
 outputs.tf
@@ -16,6 +18,7 @@ variables.tf
 ```
 
 where:
+
 - **main.tf** contains the core logic of the infrastructure
 - **outputs.tf** contains any outputs that should be exposed. This may not be necessary if you are not writing a module.
 - **terraform.tf** contains the terraform {} config block, which specifies backend and terraform/provider versions and initializes required providers
@@ -23,11 +26,14 @@ where:
 
 As the complexity of the logic in `main.tf` grows, it should be broken up into smaller, well-named files.
 These subdivisions should be made along *service-based* and *purpose-based* lines:
+
 - A **service-based** file houses configuration for a specific AWS service. e.g. if you use SSM Parameter Store, you might have a file called `ssm.tf` that contains every parameter you provision. (please redact your values!)
 - A **purpose-based** file houses configuration for a set of resources that work together to serve a single purpose and whose resources are derived from multiple AWS services. e.g. when provisioning a lambda, we want to provision resources from AWS Lambda, AWS Cloudwatch Triggers, AWS IAM to name a few. These may be contained in a file that is either named `lambda.tf` or `lambda-<lambda-name-or-use>.tf`.
 
 ### Meta-Arguments
+
 The order of arguments when using [meta-arguments](https://developer.hashicorp.com/terraform/language/meta-arguments/depends_on) should be as follows (all separated by a newline):
+
 - `count / for_each`
 - `provider`
 - all arguments required by the data/resource/module, culminating with `tags`
@@ -56,10 +62,12 @@ resource "aws_instance" "foo" {
 }
 ```
 
-Keep in mind that the use of meta-arguments is not considered best practice. If you wish to enforce any semblance of order of operations in terraform, I recommend you consider the [dependency graph](https://developer.hashicorp.com/terraform/internals/graph). More on that in the [dependency graph](!NEEDS_LINK_TO_DOCUMENT) section 
+Keep in mind that the use of meta-arguments is not considered best practice. If you wish to enforce any semblance of order of operations in terraform, I recommend you consider the [dependency graph](https://developer.hashicorp.com/terraform/internals/graph). More on that in the [dependency graph](./dependency-graph.md) section.
 
 ### Variables
+
 `variables.tf` should be organized in alphabetical order (`tfsort` is a potential tool to help do this automatically). Use descriptions and type declarations. Try to name your variables with proper nouns and explicit/obvious meaning interpretations.
+
 ```hcl
 variable "ec2_desired_count" {
   description = "number of EC2 tasks to run ..."
@@ -71,4 +79,4 @@ variables "image_tag" {
   description = "the image tag to use for ..."
   type        = string
 }
-```
\ No newline at end of file
+```