forked from edmundmiller/Comet-Galaxy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpromoter.pl
127 lines (100 loc) · 3.97 KB
/
promoter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include ('list_input.pl').
% need new rules
prokaryote(G) :- not eukaryote(G).
eukaryote(G) :- not prokaryote(G).
promoter(G, IndexStartSite, IndexPribnow, IndexSecond) :-
% tss - transcription start site
tss(G, S, IndexStartSite),
subsequence(P, G, IndexPribnow),pribnowbox(P),
% P appears approximately 10 nucleotides before S
IndexStartSite - IndexPribnow .=. 10,
subsequence(X, G, IndexSecond),
% and X is similar to the consensus sequence “TTGACA”
similar(X, ['t','t','g','a','c','a']),
% and X appears approximately 35 nucleotides before S
IndexStartSite - IndexSecond .=. 35,
prokaryote(G).
pribnowbox(P) :-
list_length(P, 0, L1),
L1 .=. 6,
similar(P, ['t','a','t','a','a','t']).
promoter(G, IndexStartSite, IndexTata, -1) :-
tss(G, S, IndexStartSite),
subsequence(P, G, IndexTata),tatabox(P),
IndexStartSite - IndexTata .=. 25,
eukaryote(G).
tatabox(P) :- similar(P, ['t','a','t','a','a','a','a']).
tatabox(P) :- similar(P, ['t','a','t','a','a','a','t']).
tatabox(P) :- similar(P, ['t','a','t','a','t','a','a']).
tatabox(P) :- similar(P, ['t','a','t','a','t','a','t']).
stop_codon(['t', 'a', 'g']).
stop_codon(['t', 'a', 'a']).
stop_codon(['t', 'g', 'a']).
reversed_stop_codon(P) :-
stop_codon(Q),
reverse_list(Q, P).
% Generate all O(N^2) sublists of the input list
generate_sublists([Head | Tail], Result, 0) :-
generate_prefixlists([Head | Tail], Result).
generate_sublists([_ | Tail], Result, Index) :-
generate_sublists(Tail, Result, Index2),
Index is Index2 + 1.
% Generate prefixes of the given list
generate_prefixlists([Head | _], [Head | []]).
generate_prefixlists([Head | Tail], [Head | Result]) :-
generate_prefixlists(Tail, Result).
% Finds all transcription sections in the Genome.
% Transcription sections should start with a start codon and end with a stop codon.
% However, there should not be any stop codons in the 'body' of the section.
tss(Genome, Candidate, Index) :-
generate_sublists(Genome, Candidate, Index),
reverse_list(Candidate, ReverseCandidate),
prefix_match(['a', 't', 'g'], Candidate),
reversed_stop_codon(Stop2),
prefix_match(Stop2, ReverseCandidate).
reverse_list(InputList, OutputList) :- reverse_list(InputList, [], OutputList).
reverse_list([], Acc, Acc).
reverse_list([Head | Tail], Acc, Reversed) :- reverse_list(Tail, [Head | Acc], Reversed).
list_length( [] , L , L ) .
list_length( [_|Xs] , T , L ) :-
T1 is T+1 ,
list_length(Xs,T1,L).
% have the same base pair in at least 4 positions
similar(X, C) :-
num_match(X, C, NumMatches),
NumMatches >= 4.
% Returns the number of positions in the two lists that match
num_match(_, [], 0).
num_match([], _, 0).
num_match([Head | Tail1], [Head | Tail2], Result) :-
num_match(Tail1, Tail2, Result2),
Result is Result2 + 1.
num_match([Head1 | Tail1], [Head2 | Tail2], Result) :-
Head1 \= Head2,
num_match(Tail1, Tail2, Result).
prefix_match([], _).
prefix_match([H | T1], [H | T2]) :- prefix_match(T1, T2).
subsequence(Pattern, Sequence, 0) :- prefix_match(Pattern, Sequence).
subsequence(Pattern, [_ | Tail], Index) :-
subsequence(Pattern, Tail, Index2),
Index is Index2 + 1.
% Searches for `Pattern` in `Sequence` but ignores the subsequence
% if it begins at index 0
subsequence_ignore0(Pattern, Sequence) :-
subsequence(Pattern, Sequence, Index),
Index > 0.
% ?- prefix_match([a, b, c], [a, b, c, d, e]).
% ?- prefix_match([a, e, c], [a, b, c, d, e]).
% ?- subsequence([a, b], [b, c, a, b, e, a, b], I).
% ?- subsequence([a, b], [b, c, a, d, e], I).
% ?- sequence(DNA), promoter(DNA).
% ?- similar([a, b, d, e], [a, b, d, e]).
% ?- generate_sublists([a, b, c, d], X).
% ?- reverse_list([a, b, c, d], R).
% ?- subsequence_ignore0([a, b], [a, b, c, b], X).
% ?- subsequence_ignore0([a, b], [a, b, a, b], X).
% ?- tts(['a', 't', 'g', 't', 'a', 'g'], X).
% ?- tts(['a', 't', 'g', 'c', 'c', 't', 'a', 'a', 't', 'a', 'g'], X).
% ?- tts(['a', 't', 'c', 'c', 't', 'a', 'a', 't', 'a', 'g'], X).
% ?- sequence(X), tss(X, S).
?- sequence(X), promoter(X, A, B, C).