Skip to content

Commit

Permalink
Change -eval_labels to -eval_seqs
Browse files Browse the repository at this point in the history
  • Loading branch information
rcedgar committed Aug 13, 2024
1 parent 503370f commit 6dd83f7
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 26 deletions.
24 changes: 9 additions & 15 deletions src/msta_scores.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ void cmd_msta_scores()

const bool DoCore = opt_core;

string LabelDir;
if (optset_labeldir)
string SeqsDir;
if (optset_seqsdir)
{
LabelDir = string(opt_labeldir);
Dirize(LabelDir);
SeqsDir = string(opt_seqsdir);
Dirize(SeqsDir);
}

const uint N = SIZE(Accs);
Expand All @@ -45,18 +45,12 @@ void cmd_msta_scores()
}

SeqDB MSA;
if (optset_labeldir)
if (optset_seqsdir)
{
set<string> EvalLabels;
vector<string> EvalLabelVec;
string LabelFN = LabelDir + Acc;
ReadLinesFromFile(LabelFN, EvalLabelVec);
uint N = SIZE(EvalLabelVec);
if (N == 0)
Die("Empty -labels file");
for (uint i = 0; i < N; ++i)
EvalLabels.insert(EvalLabelVec[i]);
MSA.FromFasta_Labels(FN, EvalLabels, true);
string SeqsFN = SeqsDir + Acc;
SeqDB EvalSeqs;
EvalSeqs.FromFasta(SeqsFN, false);
MSA.FromFasta_Seqs(FN, EvalSeqs, true);
}
else
MSA.FromFasta(FN, true);
Expand Down
2 changes: 1 addition & 1 deletion src/myopts.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ STR_OPT(scoredist)
STR_OPT(ref)
STR_OPT(label)
STR_OPT(labels)
STR_OPT(labeldir)
STR_OPT(seqsdir)
STR_OPT(fasta)
STR_OPT(feature_fasta)
STR_OPT(alnout)
Expand Down
31 changes: 22 additions & 9 deletions src/seqdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,36 +163,49 @@ void SeqDB::SetIsNucleo()
m_IsNucleoSet = true;
}

void SeqDB::FromFasta_Labels(const string &FileName,
const set<string> &Labels, bool AllowGaps)
void SeqDB::FromFasta_Seqs(const string &FileName,
const SeqDB &EvalSeqs, bool AllowGaps)
{
SFasta SF;
SF.Open(FileName);
SF.m_AllowGaps = AllowGaps;
uint FoundCount = 0;
m_IsAligned = false;
set<string> EvalSeqSet;
const uint EvalSeqCount = EvalSeqs.GetSeqCount();
for (uint i = 0; i < EvalSeqCount; ++i)
EvalSeqSet.insert(EvalSeqs.GetSeq(i));

for (;;)
{
const char* Seq = SF.GetNextSeq();
if (Seq == 0)
break;
const string Label = SF.GetLabel();
if (Labels.find(Label) == Labels.end())
continue;
++FoundCount;
const unsigned L = SF.GetSeqLength();
if (L == 0)
continue;
const string Label = SF.GetLabel();

string s2;
for (uint i = 0; i < L; ++i)
{
char c = Seq[i];
if (!isgap(c))
s2 += toupper(c);
}
if (EvalSeqSet.find(s2) == EvalSeqSet.end())
continue;

++FoundCount;
string s;
for (unsigned i = 0; i < L; ++i)
s.push_back(Seq[i]);
AddSeq(Label, s);
}
const uint LabelCount = SIZE(Labels);
if (FoundCount == 0)
Die("No labels found");
if (FoundCount < LabelCount)
Warning("%u / %u labels not found", LabelCount - FoundCount, LabelCount);
if (FoundCount < EvalSeqCount)
Warning("%u / %u labels not found", EvalSeqCount - FoundCount, EvalSeqCount);
}

void SeqDB::FromFasta(const string& FileName, bool AllowGaps)
Expand Down
2 changes: 1 addition & 1 deletion src/seqdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class SeqDB
bool GetIsNucleo();
unsigned GetSeqCount() const { return SIZE(m_Seqs); }
void FromFasta(const string &FileName, bool AllowGaps = false);
void FromFasta_Labels(const string &FileName, const set<string> &Labels, bool AllowGaps = false);
void FromFasta_Seqs(const string &FileName, const SeqDB &Seqs, bool AllowGaps = false);
void WritePretty(FILE *f) const;
void LogMe() const;

Expand Down

0 comments on commit 6dd83f7

Please sign in to comment.