Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

3388 allow organisms without consensus sequences #3537

Merged
merged 15 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ data class Schema(
val metadata: List<Metadata>,
val externalMetadata: List<ExternalMetadata> = emptyList(),
val earliestReleaseDate: EarliestReleaseDate = EarliestReleaseDate(false, emptyList()),
val allowSubmissionOfConsensusSequences: Boolean = true,
fengelniederhammer marked this conversation as resolved.
Show resolved Hide resolved
)

// The Json property names need to be kept in sync with website config enum `metadataPossibleTypes` in `config.ts`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ open class SubmissionController(
@HiddenParam authenticatedUser: AuthenticatedUser,
@Parameter(description = GROUP_ID_DESCRIPTION) @RequestParam groupId: Int,
@Parameter(description = METADATA_FILE_DESCRIPTION) @RequestParam metadataFile: MultipartFile,
@Parameter(description = SEQUENCE_FILE_DESCRIPTION) @RequestParam sequenceFile: MultipartFile,
@Parameter(description = SEQUENCE_FILE_DESCRIPTION) @RequestParam sequenceFile: MultipartFile?,
@Parameter(description = "Data Use terms under which data is released.") @RequestParam dataUseTermsType:
DataUseTermsType,
@Parameter(
Expand Down Expand Up @@ -118,7 +118,7 @@ open class SubmissionController(
) @RequestParam metadataFile: MultipartFile,
@Parameter(
description = SEQUENCE_FILE_DESCRIPTION,
) @RequestParam sequenceFile: MultipartFile,
) @RequestParam sequenceFile: MultipartFile?,
): List<SubmissionIdMapping> {
val params = SubmissionParams.RevisionSubmissionParams(
organism,
Expand Down Expand Up @@ -172,7 +172,9 @@ open class SubmissionController(
}

val lastDatabaseWriteETag = releasedDataModel.getLastDatabaseWriteETag()
if (ifNoneMatch == lastDatabaseWriteETag) return ResponseEntity.status(HttpStatus.NOT_MODIFIED).build()
if (ifNoneMatch == lastDatabaseWriteETag) {
return ResponseEntity.status(HttpStatus.NOT_MODIFIED).build()
}

val headers = HttpHeaders()
headers.contentType = MediaType.parseMediaType(MediaType.APPLICATION_NDJSON_VALUE)
Expand Down
57 changes: 40 additions & 17 deletions backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import org.loculus.backend.api.DataUseTerms
import org.loculus.backend.api.Organism
import org.loculus.backend.api.SubmissionIdMapping
import org.loculus.backend.auth.AuthenticatedUser
import org.loculus.backend.config.BackendConfig
import org.loculus.backend.controller.BadRequestException
import org.loculus.backend.controller.DuplicateKeyException
import org.loculus.backend.controller.UnprocessableEntityException
Expand Down Expand Up @@ -41,14 +42,14 @@ interface SubmissionParams {
val organism: Organism
val authenticatedUser: AuthenticatedUser
val metadataFile: MultipartFile
val sequenceFile: MultipartFile
val sequenceFile: MultipartFile?
val uploadType: UploadType

data class OriginalSubmissionParams(
override val organism: Organism,
override val authenticatedUser: AuthenticatedUser,
override val metadataFile: MultipartFile,
override val sequenceFile: MultipartFile,
override val sequenceFile: MultipartFile?,
val groupId: Int,
val dataUseTerms: DataUseTerms,
) : SubmissionParams {
Expand All @@ -59,7 +60,7 @@ interface SubmissionParams {
override val organism: Organism,
override val authenticatedUser: AuthenticatedUser,
override val metadataFile: MultipartFile,
override val sequenceFile: MultipartFile,
override val sequenceFile: MultipartFile?,
) : SubmissionParams {
override val uploadType: UploadType = UploadType.REVISION
}
Expand All @@ -76,6 +77,7 @@ class SubmitModel(
private val groupManagementPreconditionValidator: GroupManagementPreconditionValidator,
private val dataUseTermsPreconditionValidator: DataUseTermsPreconditionValidator,
private val dateProvider: DateProvider,
private val backendConfig: BackendConfig,
) {

companion object AcceptedFileTypes {
Expand Down Expand Up @@ -106,9 +108,11 @@ class SubmitModel(
batchSize,
)

log.debug { "Validating submission with uploadId $uploadId" }
val (metadataSubmissionIds, sequencesSubmissionIds) = uploadDatabaseService.getUploadSubmissionIds(uploadId)
validateSubmissionIdSets(metadataSubmissionIds.toSet(), sequencesSubmissionIds.toSet())
if (requiresSequenceFile(submissionParams.organism)) {
log.debug { "Validating submission with uploadId $uploadId" }
val (metadataSubmissionIds, sequencesSubmissionIds) = uploadDatabaseService.getUploadSubmissionIds(uploadId)
validateSubmissionIdSets(metadataSubmissionIds.toSet(), sequencesSubmissionIds.toSet())
}

if (submissionParams is SubmissionParams.RevisionSubmissionParams) {
log.info { "Associating uploaded sequence data with existing sequence entries with uploadId $uploadId" }
Expand Down Expand Up @@ -150,17 +154,32 @@ class SubmitModel(
metadataTempFileToDelete.delete()
}

val sequenceTempFileToDelete = MaybeFile()
try {
val sequenceStream = getStreamFromFile(
submissionParams.sequenceFile,
uploadId,
sequenceFileTypes,
sequenceTempFileToDelete,
)
uploadSequences(uploadId, sequenceStream, batchSize, submissionParams.organism)
} finally {
sequenceTempFileToDelete.delete()
val sequenceFile = submissionParams.sequenceFile
if (sequenceFile == null) {
if (requiresSequenceFile(submissionParams.organism)) {
throw BadRequestException(
"Submissions for organism ${submissionParams.organism.name} require a sequence file.",
)
}
} else {
if (!requiresSequenceFile(submissionParams.organism)) {
throw BadRequestException(
"Sequence uploads are not allowed for organism ${submissionParams.organism.name}.",
)
}

val sequenceTempFileToDelete = MaybeFile()
try {
val sequenceStream = getStreamFromFile(
sequenceFile,
uploadId,
sequenceFileTypes,
sequenceTempFileToDelete,
)
uploadSequences(uploadId, sequenceStream, batchSize, submissionParams.organism)
} finally {
sequenceTempFileToDelete.delete()
}
}
}

Expand Down Expand Up @@ -324,4 +343,8 @@ class SubmitModel(
SequenceUploadAuxTable.select(SequenceUploadAuxTable.sequenceSubmissionIdColumn).count() > 0
return metadataInAuxTable || sequencesInAuxTable
}

private fun requiresSequenceFile(organism: Organism) = backendConfig.getInstanceConfig(organism)
fengelniederhammer marked this conversation as resolved.
Show resolved Hide resolved
.schema
.allowSubmissionOfConsensusSequences
}
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,17 @@ class UploadDatabaseService(
jsonb_build_object(
'metadata', metadata_upload_aux_table.metadata,
'unalignedNucleotideSequences',
jsonb_object_agg(
sequence_upload_aux_table.segment_name,
sequence_upload_aux_table.compressed_sequence_data::jsonb
COALESCE(
jsonb_object_agg(
sequence_upload_aux_table.segment_name,
sequence_upload_aux_table.compressed_sequence_data::jsonb
) FILTER (WHERE sequence_upload_aux_table.segment_name IS NOT NULL),
'{}'::jsonb
)
)
FROM
metadata_upload_aux_table
JOIN
LEFT JOIN
sequence_upload_aux_table
ON metadata_upload_aux_table.upload_id = sequence_upload_aux_table.upload_id
AND metadata_upload_aux_table.submission_id = sequence_upload_aux_table.submission_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.testcontainers.shaded.org.awaitility.Awaitility.await

const val DEFAULT_ORGANISM = "dummyOrganism"
const val OTHER_ORGANISM = "otherOrganism"
const val ORGANISM_WITHOUT_CONSENSUS_SEQUENCES = "dummyOrganismWithoutConsensusSequences"
const val DEFAULT_PIPELINE_VERSION = 1L
const val DEFAULT_EXTERNAL_METADATA_UPDATER = "ena"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,24 @@ import org.hamcrest.CoreMatchers.hasItem
import org.hamcrest.CoreMatchers.`is`
import org.hamcrest.MatcherAssert.assertThat
import org.hamcrest.Matchers.allOf
import org.hamcrest.Matchers.anEmptyMap
import org.hamcrest.Matchers.containsInAnyOrder
import org.hamcrest.Matchers.empty
import org.hamcrest.Matchers.greaterThan
import org.hamcrest.Matchers.hasProperty
import org.hamcrest.Matchers.hasSize
import org.hamcrest.Matchers.matchesRegex
import org.junit.jupiter.api.Test
import org.loculus.backend.api.GeneticSequence
import org.loculus.backend.api.OriginalData
import org.loculus.backend.api.Status.IN_PROCESSING
import org.loculus.backend.api.Status.RECEIVED
import org.loculus.backend.api.UnprocessedData
import org.loculus.backend.config.BackendSpringProperty
import org.loculus.backend.controller.DEFAULT_ORGANISM
import org.loculus.backend.controller.DEFAULT_USER_NAME
import org.loculus.backend.controller.EndpointTest
import org.loculus.backend.controller.ORGANISM_WITHOUT_CONSENSUS_SEQUENCES
import org.loculus.backend.controller.OTHER_ORGANISM
import org.loculus.backend.controller.assertStatusIs
import org.loculus.backend.controller.expectForbiddenResponse
Expand All @@ -27,7 +31,6 @@ import org.loculus.backend.controller.expectUnauthorizedResponse
import org.loculus.backend.controller.getAccessionVersions
import org.loculus.backend.controller.jwtForDefaultUser
import org.loculus.backend.controller.submission.SubmitFiles.DefaultFiles
import org.loculus.backend.controller.submission.SubmitFiles.DefaultFiles.NUMBER_OF_SEQUENCES
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.http.HttpHeaders.ETAG
import org.springframework.test.web.servlet.result.MockMvcResultMatchers.header
Expand Down Expand Up @@ -181,4 +184,37 @@ class ExtractUnprocessedDataEndpointTest(
`is`(empty()),
)
}

@Test
fun `GIVEN entries for organism without consensus sequences THEN only returns metadata`() {
val submissionResult = convenienceClient.submitDefaultFiles(organism = ORGANISM_WITHOUT_CONSENSUS_SEQUENCES)
val accessionVersions = submissionResult.submissionIdMappings

val result = client.extractUnprocessedData(
numberOfSequenceEntries = DefaultFiles.NUMBER_OF_SEQUENCES,
organism = ORGANISM_WITHOUT_CONSENSUS_SEQUENCES,
)
val responseBody = result.expectNdjsonAndGetContent<UnprocessedData>()
assertThat(responseBody, hasSize(DefaultFiles.NUMBER_OF_SEQUENCES))
assertThat(
responseBody,
hasItem(
allOf(
hasProperty<UnprocessedData>("accession", `is`(accessionVersions[0].accession)),
hasProperty("version", `is`(1L)),
hasProperty(
"data",
allOf(
hasProperty<OriginalData<GeneticSequence>>("metadata", `is`(defaultOriginalData.metadata)),
hasProperty("unalignedNucleotideSequences", `is`(anEmptyMap<String, GeneticSequence>())),
),
),
hasProperty("submissionId", matchesRegex("custom[0-9]")),
hasProperty("submitter", `is`(DEFAULT_USER_NAME)),
hasProperty("groupId", `is`(submissionResult.groupId)),
hasProperty("submittedAt", greaterThan(1_700_000_000L)),
),
),
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.fasterxml.jackson.databind.node.IntNode
import com.fasterxml.jackson.databind.node.NullNode
import com.fasterxml.jackson.databind.node.TextNode
import org.loculus.backend.api.GeneName
import org.loculus.backend.api.GeneticSequence
import org.loculus.backend.api.Insertion
import org.loculus.backend.api.PreprocessingAnnotation
import org.loculus.backend.api.PreprocessingAnnotationSource
Expand Down Expand Up @@ -99,6 +100,21 @@ val defaultProcessedDataMultiSegmented = ProcessedData(
),
)

val defaultProcessedDataWithoutSequences = ProcessedData<GeneticSequence>(
metadata = mapOf(
"date" to TextNode("2002-12-15"),
"host" to TextNode("google.com"),
fengelniederhammer marked this conversation as resolved.
Show resolved Hide resolved
"region" to TextNode("Europe"),
"country" to TextNode("Spain"),
"division" to NullNode.instance,
),
unalignedNucleotideSequences = emptyMap(),
alignedNucleotideSequences = emptyMap(),
nucleotideInsertions = emptyMap(),
alignedAminoAcidSequences = emptyMap(),
aminoAcidInsertions = emptyMap(),
)

private val defaultSuccessfulSubmittedData = SubmittedProcessedData(
accession = "If a test result shows this, processed data was not prepared correctly.",
version = 1,
Expand Down
Loading
Loading