Skip to content

Commit

Permalink
Merge pull request #855 from /issues/854/attr-cache-improvements
Browse files Browse the repository at this point in the history
Improve file attribute caching
  • Loading branch information
steve-todorov authored Nov 21, 2024
2 parents 03cd6fb + 596434b commit 6b58457
Show file tree
Hide file tree
Showing 48 changed files with 1,345 additions and 436 deletions.
79 changes: 52 additions & 27 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ plugins {

allprojects {
repositories {
mavenLocal()
//mavenLocal()
// Allows you to specify your own repository manager instance.
if (project.hasProperty("s3fs.proxy.url")) {
maven {
Expand All @@ -38,6 +38,50 @@ java {
withJavadocJar()
}

// Configure multiple test sources
testing {
suites {
// Just for self reference, technically this is already configured by default.
val test by getting(JvmTestSuite::class) {
useJUnitJupiter() // already the default.
testType.set(TestSuiteType.UNIT_TEST) // already the default.
}

// testIntegration test sources
val testIntegration by registering(JvmTestSuite::class) {
val self = this
testType.set(TestSuiteType.INTEGRATION_TEST)

// We need to manually add the "main" sources to the classpath.
sourceSets {
named(self.name) {
compileClasspath += sourceSets.main.get().output + sourceSets.test.get().output
runtimeClasspath += sourceSets.main.get().output + sourceSets.test.get().output
}
}

// Inherit implementation, runtime and test dependencies (adds them to the compile classpath)
configurations.named("${self.name}Implementation") {
extendsFrom(configurations.testImplementation.get())
extendsFrom(configurations.runtimeOnly.get())
extendsFrom(configurations.implementation.get())
}

// Make sure the integration test is executed as part of the "check" task.
tasks.named<Task>("check") {
dependsOn(named<JvmTestSuite>(self.name))
}

tasks.named<Task>(self.name) {
mustRunAfter(test)
}

}
}


}

dependencies {
api(platform("software.amazon.awssdk:bom:2.29.9"))
api("software.amazon.awssdk:s3") {
Expand All @@ -49,6 +93,9 @@ dependencies {
exclude("org.slf4j", "slf4j-api")
}
api("com.google.code.findbugs:jsr305:3.0.2")
api("com.github.ben-manes.caffeine:caffeine:2.9.3") {
because("Last version to support JDK 8.")
}

testImplementation("ch.qos.logback:logback-classic:1.5.12")
testImplementation("org.junit.jupiter:junit-jupiter:5.11.3")
Expand Down Expand Up @@ -140,6 +187,10 @@ tasks {
}
}

named<Task>("check") {
dependsOn(named<Task>("testIntegration"))
}

named<Task>("jacocoTestReport") {
group = "jacoco"
dependsOn(named("test")) // tests are required to run before generating the report
Expand All @@ -162,32 +213,10 @@ tasks {
group = "sonar"
}

named<Test>("test") {
description = "Run unit tests"
outputs.upToDateWhen { false }
useJUnitPlatform {
filter {
excludeTestsMatching("*IT")
}
}
}

withType<Test> {
defaultCharacterEncoding = "UTF-8"
}

create<Test>("it-s3") {
group = "verification"
description = "Run integration tests using S3"
useJUnitPlatform {
filter {
includeTestsMatching("*IT")
includeTags("it-s3")
}
}
mustRunAfter(named("test"))
}

// TODO: There are some problems with using minio that overcomplicate the setup.
// For the time being we'll be disabling it until we figure out the best path forward.
// create<Test>("it-minio") {
Expand All @@ -201,10 +230,6 @@ tasks {
// }
// }

named<Task>("check") {
dependsOn(named("it-s3"))
}

withType<Sign> {
onlyIf {
(project.hasProperty("withSignature") && project.findProperty("withSignature") == "true") ||
Expand Down
8 changes: 5 additions & 3 deletions docs/content/contributing/developer-guide/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Before you start writing code, please read:
## System requirements

1. Gradle 8.1, or higher
2. `JDK8`, `JDK11` or `JDK17`
2. `JDK8`, `JDK11`, `JDK17` or `JDK21`

## Finding issues to work on

Expand Down Expand Up @@ -85,7 +85,7 @@ s3fs.proxy.url=https://my.local.domain/path/to/repository
### Build
Builds the entire code and runs unit and integration tests.
It is assumed you already have the `amazon-test.properties` configuration in place.
It is assumed you already have the `amazon-test.properties` configuration in place under the `src/test/resources` or `src/testIntegration/resources`.
```
./gradlew build
Expand All @@ -100,9 +100,11 @@ It is assumed you already have the `amazon-test.properties` configuration in pla
### Run only integration tests
```
./gradlew it-s3
./gradlew testIntegration
```
You can also use `./gradlew build -x testIntegration` to skip the integration tests.
### Run all tests
```
Expand Down
52 changes: 27 additions & 25 deletions docs/content/reference/configuration-options.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,30 @@

A complete list of environment variables which can be set to configure the client.

| Key | Default | Description |
|-------------------------------------------|---------|-------------------------------------------------------------------------------------------------------------------------|
| s3fs.access.key | none | <small>AWS access key, used to identify the user interacting with AWS</small> |
| s3fs.secret.key | none | <small>AWS secret access key, used to authenticate the user interacting with AWS</small> |
| s3fs.request.metric.collector.class | TODO | <small>Fully-qualified class name to instantiate an AWS SDK request/response metric collector</small> |
| s3fs.connection.timeout | TODO | <small>Timeout (in milliseconds) for establishing a connection to a remote service</small> |
| s3fs.max.connections | TODO | <small>Maximum number of connections allowed in a connection pool</small> |
| s3fs.max.retry.error | TODO | <small>Maximum number of times that a single request should be retried, assuming it fails for a retryable error</small> |
| s3fs.protocol | TODO | <small>Protocol (HTTP or HTTPS) to use when connecting to AWS</small> |
| s3fs.proxy.domain | none | <small>For NTLM proxies: The Windows domain name to use when authenticating with the proxy</small> |
| s3fs.proxy.protocol | none | <small>Proxy connection protocol.</small> |
| s3fs.proxy.host | none | <small>Proxy host name either from the configured endpoint or from the "http.proxyHost" system property</small> |
| s3fs.proxy.password | none | <small>The password to use when connecting through a proxy</small> |
| s3fs.proxy.port | none | <small>Proxy port either from the configured endpoint or from the "http.proxyPort" system property</small> |
| s3fs.proxy.username | none | <small>The username to use when connecting through a proxy</small> |
| s3fs.proxy.workstation | none | <small>For NTLM proxies: The Windows workstation name to use when authenticating with the proxy</small> |
| s3fs.region | none | <small>The AWS Region to configure the client</small> |
| s3fs.socket.send.buffer.size.hint | TODO | <small>The size hint (in bytes) for the low level TCP send buffer</small> |
| s3fs.socket.receive.buffer.size.hint | TODO | <small>The size hint (in bytes) for the low level TCP receive buffer</small> |
| s3fs.socket.timeout | TODO | <small>Timeout (in milliseconds) for each read to the underlying socket</small> |
| s3fs.user.agent.prefix | TODO | <small>Prefix of the user agent that is sent with each request to AWS</small> |
| s3fs.amazon.s3.factory.class | TODO | <small>Fully-qualified class name to instantiate a S3 factory base class which creates a S3 client instance</small> |
| s3fs.signer.override | TODO | <small>Fully-qualified class name to define the signer that should be used when authenticating with AWS</small> |
| s3fs.path.style.access | TODO | <small>Boolean that indicates whether the client uses path-style access for all requests</small> |
| s3fs.request.header.cache-control | blank | <small>Configures the `cacheControl` on request builders (i.e. `CopyObjectRequest`, `PutObjectRequest`, etc) |
| Key | Default | Description |
|-------------------------------------|---------|-------------------------------------------------------------------------------------------------------------------------|
| s3fs.access.key | none | <small>AWS access key, used to identify the user interacting with AWS</small> |
| s3fs.secret.key | none | <small>AWS secret access key, used to authenticate the user interacting with AWS</small> |
| s3fs.request.metric.collector.class | TODO | <small>Fully-qualified class name to instantiate an AWS SDK request/response metric collector</small> |
| s3fs.cache.attributes.ttl | `60000` | <small>TTL for the cached file attributes (in millis)</small> |
| s3fs.cache.attributes.size | `5000` | <small>Total size of cached file attributes</small> |
| s3fs.connection.timeout | TODO | <small>Timeout (in milliseconds) for establishing a connection to a remote service</small> |
| s3fs.max.connections | TODO | <small>Maximum number of connections allowed in a connection pool</small> |
| s3fs.max.retry.error | TODO | <small>Maximum number of times that a single request should be retried, assuming it fails for a retryable error</small> |
| s3fs.protocol | TODO | <small>Protocol (HTTP or HTTPS) to use when connecting to AWS</small> |
| s3fs.proxy.domain | none | <small>For NTLM proxies: The Windows domain name to use when authenticating with the proxy</small> |
| s3fs.proxy.protocol | none | <small>Proxy connection protocol.</small> |
| s3fs.proxy.host | none | <small>Proxy host name either from the configured endpoint or from the "http.proxyHost" system property</small> |
| s3fs.proxy.password | none | <small>The password to use when connecting through a proxy</small> |
| s3fs.proxy.port | none | <small>Proxy port either from the configured endpoint or from the "http.proxyPort" system property</small> |
| s3fs.proxy.username | none | <small>The username to use when connecting through a proxy</small> |
| s3fs.proxy.workstation | none | <small>For NTLM proxies: The Windows workstation name to use when authenticating with the proxy</small> |
| s3fs.region | none | <small>The AWS Region to configure the client</small> |
| s3fs.socket.send.buffer.size.hint | TODO | <small>The size hint (in bytes) for the low level TCP send buffer</small> |
| s3fs.socket.receive.buffer.size.hint | TODO | <small>The size hint (in bytes) for the low level TCP receive buffer</small> |
| s3fs.socket.timeout | TODO | <small>Timeout (in milliseconds) for each read to the underlying socket</small> |
| s3fs.user.agent.prefix | TODO | <small>Prefix of the user agent that is sent with each request to AWS</small> |
| s3fs.amazon.s3.factory.class | TODO | <small>Fully-qualified class name to instantiate a S3 factory base class which creates a S3 client instance</small> |
| s3fs.signer.override | TODO | <small>Fully-qualified class name to define the signer that should be used when authenticating with AWS</small> |
| s3fs.path.style.access | TODO | <small>Boolean that indicates whether the client uses path-style access for all requests</small> |
| s3fs.request.header.cache-control | blank | <small>Configures the `cacheControl` on request builders (i.e. `CopyObjectRequest`, `PutObjectRequest`, etc) |
14 changes: 14 additions & 0 deletions src/main/java/org/carlspring/cloud/storage/s3fs/S3Factory.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.time.Duration;
import java.util.Properties;

import org.carlspring.cloud.storage.s3fs.attribute.S3BasicFileAttributes;
import org.carlspring.cloud.storage.s3fs.attribute.S3PosixFileAttributes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
Expand Down Expand Up @@ -41,6 +43,18 @@ public abstract class S3Factory

public static final String SECRET_KEY = "s3fs.secret.key";

/**
* Maximum TTL in millis to cache {@link S3BasicFileAttributes} and {@link S3PosixFileAttributes}.
*/
public static final String CACHE_ATTRIBUTES_TTL = "s3fs.cache.attributes.ttl";
public static final int CACHE_ATTRIBUTES_TTL_DEFAULT = 60000;

/**
* Total size of {@link S3BasicFileAttributes} and {@link S3PosixFileAttributes} cache.
*/
public static final String CACHE_ATTRIBUTES_SIZE = "s3fs.cache.attributes.size";
public static final int CACHE_ATTRIBUTES_SIZE_DEFAULT = 30000;

public static final String REQUEST_METRIC_COLLECTOR_CLASS = "s3fs.request.metric.collector.class";

public static final String CONNECTION_TIMEOUT = "s3fs.connection.timeout";
Expand Down
34 changes: 25 additions & 9 deletions src/main/java/org/carlspring/cloud/storage/s3fs/S3FileSystem.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
package org.carlspring.cloud.storage.s3fs;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.carlspring.cloud.storage.s3fs.cache.S3FileAttributesCache;
import org.carlspring.cloud.storage.s3fs.util.S3Utils;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.Bucket;

import java.io.IOException;
import java.nio.file.FileStore;
import java.nio.file.FileSystem;
Expand All @@ -10,10 +17,6 @@
import java.util.Properties;
import java.util.Set;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.Bucket;
import static org.carlspring.cloud.storage.s3fs.S3Path.PATH_SEPARATOR;

/**
Expand All @@ -34,7 +37,7 @@ public class S3FileSystem

private final String endpoint;

private final int cache;
private S3FileAttributesCache fileAttributesCache;

private final Properties properties;

Expand All @@ -48,8 +51,12 @@ public S3FileSystem(final S3FileSystemProvider provider,
this.key = key;
this.client = client;
this.endpoint = endpoint;
this.cache = 60000; // 1 minute cache for the s3Path
this.properties = properties;

int cacheTTL = Integer.parseInt(String.valueOf(properties.getOrDefault(S3Factory.CACHE_ATTRIBUTES_TTL, S3Factory.CACHE_ATTRIBUTES_TTL_DEFAULT)));
int cacheSize = Integer.parseInt(String.valueOf(properties.getOrDefault(S3Factory.CACHE_ATTRIBUTES_SIZE, S3Factory.CACHE_ATTRIBUTES_SIZE_DEFAULT)));

this.fileAttributesCache = new S3FileAttributesCache(cacheTTL, cacheSize);
}

public S3FileSystem(final S3FileSystemProvider provider,
Expand All @@ -75,6 +82,7 @@ public String getKey()
public void close()
throws IOException
{
this.fileAttributesCache.invalidateAll();
this.provider.close(this);
}

Expand Down Expand Up @@ -171,14 +179,22 @@ public String getEndpoint()
return endpoint;
}

/**
* @deprecated Use {@link org.carlspring.cloud.storage.s3fs.util.S3Utils#key2Parts(String)} instead. To be removed in one of next majors versions.
* @param keyParts
* @return String[]
*/
public String[] key2Parts(String keyParts)
{
return keyParts.split(PATH_SEPARATOR);
return S3Utils.key2Parts(keyParts);
}

public int getCache()
/**
* @return The {@link S3FileAttributesCache} instance holding the path attributes cache for this file provider.
*/
public S3FileAttributesCache getFileAttributesCache()
{
return cache;
return fileAttributesCache;
}

/**
Expand Down
Loading

0 comments on commit 6b58457

Please sign in to comment.