Skip to content

Commit

Permalink
support exonNumber and strand in gencode genes
Browse files Browse the repository at this point in the history
  • Loading branch information
ssadedin committed Jul 15, 2024
1 parent 6c398ee commit b0d3fde
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 7 deletions.
5 changes: 3 additions & 2 deletions src/main/groovy/gngs/gencode/Gencode.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class Gencode implements GeneAnnotationSource {
final String regionId = attributes['ID']
if(type == 'gene') {
region.properties.remove('attributes')
Gene gene = new Gene(region, regionId, (String)attributes.gene_name)
Gene gene = new Gene(region, regionId, (String)attributes.gene_name, region['strand'] as char)
if(attributes.hgnc_id)
gene.hgnc_id = ((String)attributes.hgnc_id).split(':')[-1]
gene.type = (String)attributes.gene_type
Expand All @@ -133,7 +133,8 @@ class Gencode implements GeneAnnotationSource {
// coding exons appear both with the exon and CDS designation
// UNLESS they are part of the UTR
if(type == 'CDS' || type == 'three_prime_UTR' || type == 'five_prime_UTR') {
feature = new Exon(region, regionId)
int exonNumber = Integer.parseInt((String)attributes.exon_number)
feature = new Exon(region, regionId, exonNumber)
if(type == 'CDS')
feature.coding = true
}
Expand Down
7 changes: 5 additions & 2 deletions src/main/groovy/gngs/gencode/Gene.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,13 @@ class Gene extends Feature<Transcript> {
String symbol
String hgnc_id
String type
char strand

// Transcript transcript
public Gene(IRegion region, String id, String symbol) {
public Gene(IRegion region, String id, String symbol, char strand) {
super(region, id, null);
this.symbol = symbol
this.strand = strand
}
}

Expand All @@ -67,7 +69,8 @@ class Exon extends Feature {
int exonNumber
Gene gene
boolean coding
public Exon(IRegion region, String id) {
public Exon(IRegion region, String id, int exonNumber) {
super(region, id, null);
this.exonNumber = exonNumber
}
}
13 changes: 10 additions & 3 deletions src/test/groovy/gngs/GencodeTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,17 @@ class GencodeTest {
println "Exons of the main transcript: "

dvl1.children[0].children.each {
println "Exon $it"
println "Exon $it ($it.exonNumber, coding=$it.coding)"
}

assert dvl1.strand == '-'
assert dvl1.children[0].children.size() == 17
assert dvl1.children[1].children.size() == 17

// DVL a case where coding sequence ends within the exon,
// hence exon 15 represented twice, once with coding sequence,
// once without
assert dvl1.children[0].children[0].exonNumber == 15
}

@Test
Expand All @@ -70,6 +76,7 @@ class GencodeTest {
// dvl_regions.save("dvl_regions_${i}.bed", extra: { tx.id })
// }

assert dvl1.strand == '-'
assert dvl1.children[0].children.size() == 17
assert dvl1.children[1].children.size() == 17
}
Expand Down Expand Up @@ -101,9 +108,9 @@ class GencodeTest {

println "CDS for DVL1 is $cds"

def expected = [TAS1R3:0, DVL1:2103, MIR6808:0]
def expected = [TAS1R3:0, DVL1:2073, MIR6808:0]
assert cds.size() == 3
assert cds.DVL1 == 2073
assert cds.DVL1 == expected.DVL1
assert cds.TAS1R3 == 0
assert cds.MIR6808 == 0

Expand Down

0 comments on commit b0d3fde

Please sign in to comment.