Skip to content

Commit

Permalink
feat(backend): allow organisms without sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
fengelniederhammer committed Jan 16, 2025
1 parent cf191c0 commit 7ebd450
Show file tree
Hide file tree
Showing 14 changed files with 384 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ data class Schema(
val metadata: List<Metadata>,
val externalMetadata: List<ExternalMetadata> = emptyList(),
val earliestReleaseDate: EarliestReleaseDate = EarliestReleaseDate(false, emptyList()),
val allowSubmissionOfConsensusSequences: Boolean
val allowSubmissionOfConsensusSequences: Boolean = true,
)

// The Json property names need to be kept in sync with website config enum `metadataPossibleTypes` in `config.ts`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ open class SubmissionController(
@HiddenParam authenticatedUser: AuthenticatedUser,
@Parameter(description = GROUP_ID_DESCRIPTION) @RequestParam groupId: Int,
@Parameter(description = METADATA_FILE_DESCRIPTION) @RequestParam metadataFile: MultipartFile,
@Parameter(description = SEQUENCE_FILE_DESCRIPTION) @RequestParam sequenceFile: MultipartFile,
@Parameter(description = SEQUENCE_FILE_DESCRIPTION) @RequestParam sequenceFile: MultipartFile?,
@Parameter(description = "Data Use terms under which data is released.") @RequestParam dataUseTermsType:
DataUseTermsType,
@Parameter(
Expand Down Expand Up @@ -118,7 +118,7 @@ open class SubmissionController(
) @RequestParam metadataFile: MultipartFile,
@Parameter(
description = SEQUENCE_FILE_DESCRIPTION,
) @RequestParam sequenceFile: MultipartFile,
) @RequestParam sequenceFile: MultipartFile?,
): List<SubmissionIdMapping> {
val params = SubmissionParams.RevisionSubmissionParams(
organism,
Expand Down Expand Up @@ -172,7 +172,9 @@ open class SubmissionController(
}

val lastDatabaseWriteETag = releasedDataModel.getLastDatabaseWriteETag()
if (ifNoneMatch == lastDatabaseWriteETag) return ResponseEntity.status(HttpStatus.NOT_MODIFIED).build()
if (ifNoneMatch == lastDatabaseWriteETag) {
return ResponseEntity.status(HttpStatus.NOT_MODIFIED).build()
}

val headers = HttpHeaders()
headers.contentType = MediaType.parseMediaType(MediaType.APPLICATION_NDJSON_VALUE)
Expand Down
57 changes: 40 additions & 17 deletions backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import org.loculus.backend.api.DataUseTerms
import org.loculus.backend.api.Organism
import org.loculus.backend.api.SubmissionIdMapping
import org.loculus.backend.auth.AuthenticatedUser
import org.loculus.backend.config.BackendConfig
import org.loculus.backend.controller.BadRequestException
import org.loculus.backend.controller.DuplicateKeyException
import org.loculus.backend.controller.UnprocessableEntityException
Expand Down Expand Up @@ -41,14 +42,14 @@ interface SubmissionParams {
val organism: Organism
val authenticatedUser: AuthenticatedUser
val metadataFile: MultipartFile
val sequenceFile: MultipartFile
val sequenceFile: MultipartFile?
val uploadType: UploadType

data class OriginalSubmissionParams(
override val organism: Organism,
override val authenticatedUser: AuthenticatedUser,
override val metadataFile: MultipartFile,
override val sequenceFile: MultipartFile,
override val sequenceFile: MultipartFile?,
val groupId: Int,
val dataUseTerms: DataUseTerms,
) : SubmissionParams {
Expand All @@ -59,7 +60,7 @@ interface SubmissionParams {
override val organism: Organism,
override val authenticatedUser: AuthenticatedUser,
override val metadataFile: MultipartFile,
override val sequenceFile: MultipartFile,
override val sequenceFile: MultipartFile?,
) : SubmissionParams {
override val uploadType: UploadType = UploadType.REVISION
}
Expand All @@ -76,6 +77,7 @@ class SubmitModel(
private val groupManagementPreconditionValidator: GroupManagementPreconditionValidator,
private val dataUseTermsPreconditionValidator: DataUseTermsPreconditionValidator,
private val dateProvider: DateProvider,
private val backendConfig: BackendConfig,
) {

companion object AcceptedFileTypes {
Expand Down Expand Up @@ -106,9 +108,11 @@ class SubmitModel(
batchSize,
)

log.debug { "Validating submission with uploadId $uploadId" }
val (metadataSubmissionIds, sequencesSubmissionIds) = uploadDatabaseService.getUploadSubmissionIds(uploadId)
validateSubmissionIdSets(metadataSubmissionIds.toSet(), sequencesSubmissionIds.toSet())
if (requiresSequenceFile(submissionParams.organism)) {
log.debug { "Validating submission with uploadId $uploadId" }
val (metadataSubmissionIds, sequencesSubmissionIds) = uploadDatabaseService.getUploadSubmissionIds(uploadId)
validateSubmissionIdSets(metadataSubmissionIds.toSet(), sequencesSubmissionIds.toSet())
}

if (submissionParams is SubmissionParams.RevisionSubmissionParams) {
log.info { "Associating uploaded sequence data with existing sequence entries with uploadId $uploadId" }
Expand Down Expand Up @@ -150,17 +154,32 @@ class SubmitModel(
metadataTempFileToDelete.delete()
}

val sequenceTempFileToDelete = MaybeFile()
try {
val sequenceStream = getStreamFromFile(
submissionParams.sequenceFile,
uploadId,
sequenceFileTypes,
sequenceTempFileToDelete,
)
uploadSequences(uploadId, sequenceStream, batchSize, submissionParams.organism)
} finally {
sequenceTempFileToDelete.delete()
val sequenceFile = submissionParams.sequenceFile
if (sequenceFile == null) {
if (requiresSequenceFile(submissionParams.organism)) {
throw BadRequestException(
"Submissions for organism ${submissionParams.organism.name} require a sequence file.",
)
}
} else {
if (!requiresSequenceFile(submissionParams.organism)) {
throw BadRequestException(
"Sequence uploads are not allowed for organism ${submissionParams.organism.name}.",
)
}

val sequenceTempFileToDelete = MaybeFile()
try {
val sequenceStream = getStreamFromFile(
sequenceFile,
uploadId,
sequenceFileTypes,
sequenceTempFileToDelete,
)
uploadSequences(uploadId, sequenceStream, batchSize, submissionParams.organism)
} finally {
sequenceTempFileToDelete.delete()
}
}
}

Expand Down Expand Up @@ -324,4 +343,8 @@ class SubmitModel(
SequenceUploadAuxTable.select(SequenceUploadAuxTable.sequenceSubmissionIdColumn).count() > 0
return metadataInAuxTable || sequencesInAuxTable
}

private fun requiresSequenceFile(organism: Organism) = backendConfig.getInstanceConfig(organism)
.schema
.allowSubmissionOfConsensusSequences
}
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,17 @@ class UploadDatabaseService(
jsonb_build_object(
'metadata', metadata_upload_aux_table.metadata,
'unalignedNucleotideSequences',
jsonb_object_agg(
sequence_upload_aux_table.segment_name,
sequence_upload_aux_table.compressed_sequence_data::jsonb
COALESCE(
jsonb_object_agg(
sequence_upload_aux_table.segment_name,
sequence_upload_aux_table.compressed_sequence_data::jsonb
) FILTER (WHERE sequence_upload_aux_table.segment_name IS NOT NULL),
'{}'::jsonb
)
)
FROM
metadata_upload_aux_table
JOIN
LEFT JOIN
sequence_upload_aux_table
ON metadata_upload_aux_table.upload_id = sequence_upload_aux_table.upload_id
AND metadata_upload_aux_table.submission_id = sequence_upload_aux_table.submission_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.testcontainers.shaded.org.awaitility.Awaitility.await

const val DEFAULT_ORGANISM = "dummyOrganism"
const val OTHER_ORGANISM = "otherOrganism"
const val ORGANISM_WITHOUT_SEQUENCES = "dummyOrganismWithoutSequences"
const val DEFAULT_PIPELINE_VERSION = 1L
const val DEFAULT_EXTERNAL_METADATA_UPDATER = "ena"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,24 @@ import org.hamcrest.CoreMatchers.hasItem
import org.hamcrest.CoreMatchers.`is`
import org.hamcrest.MatcherAssert.assertThat
import org.hamcrest.Matchers.allOf
import org.hamcrest.Matchers.anEmptyMap
import org.hamcrest.Matchers.containsInAnyOrder
import org.hamcrest.Matchers.empty
import org.hamcrest.Matchers.greaterThan
import org.hamcrest.Matchers.hasProperty
import org.hamcrest.Matchers.hasSize
import org.hamcrest.Matchers.matchesRegex
import org.junit.jupiter.api.Test
import org.loculus.backend.api.GeneticSequence
import org.loculus.backend.api.OriginalData
import org.loculus.backend.api.Status.IN_PROCESSING
import org.loculus.backend.api.Status.RECEIVED
import org.loculus.backend.api.UnprocessedData
import org.loculus.backend.config.BackendSpringProperty
import org.loculus.backend.controller.DEFAULT_ORGANISM
import org.loculus.backend.controller.DEFAULT_USER_NAME
import org.loculus.backend.controller.EndpointTest
import org.loculus.backend.controller.ORGANISM_WITHOUT_SEQUENCES
import org.loculus.backend.controller.OTHER_ORGANISM
import org.loculus.backend.controller.assertStatusIs
import org.loculus.backend.controller.expectForbiddenResponse
Expand All @@ -27,7 +31,6 @@ import org.loculus.backend.controller.expectUnauthorizedResponse
import org.loculus.backend.controller.getAccessionVersions
import org.loculus.backend.controller.jwtForDefaultUser
import org.loculus.backend.controller.submission.SubmitFiles.DefaultFiles
import org.loculus.backend.controller.submission.SubmitFiles.DefaultFiles.NUMBER_OF_SEQUENCES
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.http.HttpHeaders.ETAG
import org.springframework.test.web.servlet.result.MockMvcResultMatchers.header
Expand Down Expand Up @@ -181,4 +184,37 @@ class ExtractUnprocessedDataEndpointTest(
`is`(empty()),
)
}

@Test
fun `GIVEN sequences for organism without sequences THEN only returns metadata`() {
val submissionResult = convenienceClient.submitDefaultFiles(organism = ORGANISM_WITHOUT_SEQUENCES)
val accessionVersions = submissionResult.submissionIdMappings

val result = client.extractUnprocessedData(
numberOfSequenceEntries = DefaultFiles.NUMBER_OF_SEQUENCES,
organism = ORGANISM_WITHOUT_SEQUENCES,
)
val responseBody = result.expectNdjsonAndGetContent<UnprocessedData>()
assertThat(responseBody, hasSize(DefaultFiles.NUMBER_OF_SEQUENCES))
assertThat(
responseBody,
hasItem(
allOf(
hasProperty<UnprocessedData>("accession", `is`(accessionVersions[0].accession)),
hasProperty("version", `is`(1L)),
hasProperty(
"data",
allOf(
hasProperty<OriginalData<GeneticSequence>>("metadata", `is`(defaultOriginalData.metadata)),
hasProperty("unalignedNucleotideSequences", `is`(anEmptyMap<String, GeneticSequence>())),
),
),
hasProperty("submissionId", matchesRegex("custom[0-9]")),
hasProperty("submitter", `is`(DEFAULT_USER_NAME)),
hasProperty("groupId", `is`(submissionResult.groupId)),
hasProperty("submittedAt", greaterThan(1_700_000_000L)),
),
),
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.fasterxml.jackson.databind.node.IntNode
import com.fasterxml.jackson.databind.node.NullNode
import com.fasterxml.jackson.databind.node.TextNode
import org.loculus.backend.api.GeneName
import org.loculus.backend.api.GeneticSequence
import org.loculus.backend.api.Insertion
import org.loculus.backend.api.PreprocessingAnnotation
import org.loculus.backend.api.PreprocessingAnnotationSource
Expand Down Expand Up @@ -99,6 +100,21 @@ val defaultProcessedDataMultiSegmented = ProcessedData(
),
)

val defaultProcessedDataWithoutSequences = ProcessedData<GeneticSequence>(
metadata = mapOf(
"date" to TextNode("2002-12-15"),
"host" to TextNode("google.com"),
"region" to TextNode("Europe"),
"country" to TextNode("Spain"),
"division" to NullNode.instance,
),
unalignedNucleotideSequences = emptyMap(),
alignedNucleotideSequences = emptyMap(),
nucleotideInsertions = emptyMap(),
alignedAminoAcidSequences = emptyMap(),
aminoAcidInsertions = emptyMap(),
)

private val defaultSuccessfulSubmittedData = SubmittedProcessedData(
accession = "If a test result shows this, processed data was not prepared correctly.",
version = 1,
Expand Down
Loading

0 comments on commit 7ebd450

Please sign in to comment.