Skip to content

Commit

Permalink
Minor improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Jumitti committed Apr 26, 2024
1 parent 507bf45 commit db91039
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 164 deletions.
118 changes: 75 additions & 43 deletions navigation/IMF.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import datetime
import io
import re
import smtplib
from email import encoders
from email.mime.base import MIMEBase
Expand Down Expand Up @@ -120,8 +121,8 @@ def result_table_output(df):
y=alt.Y('Rel Score:Q', axis=alt.Axis(title='Relative Score'),
scale=alt.Scale(domain=[ystart, ystop])),
color=alt.condition(gene_region_selection, color_scale, alt.value('lightgray')),
tooltip=['Position'] + (['Rel Position'] if "Rel Position" in source else []) + ['Rel Score'] + (
['p-value'] if 'p-value' in source else []) + ['Sequence', 'Gene', 'Species', 'Region'],
tooltip=['Sequence', 'Position'] + (['Rel Position'] if "Rel Position" in source else []) + ['Rel Score'] + (
['p-value'] if 'p-value' in source else []) + ['Gene', 'Species', 'Region'],
opacity=alt.condition(gene_region_selection, alt.value(0.8), alt.value(0.2))
).transform_calculate(x=f'datum[{xcol_param.name}]').properties(width=600,
height=400).interactive().add_params(
Expand Down Expand Up @@ -171,7 +172,12 @@ def BSF_page():
'Danio rerio']
promoter_name = line[1:]
words = promoter_name.lstrip('>').split()
name = words[0]
pattern = r">(\w+)\s+(\w+)\s+\|"
match = re.search(pattern, line)
if match:
name = words[0] + ' ' + words[1]
else:
name = words[0]
for species in species_prom:
if species.lower() in promoter_name.lower():
found_species = species
Expand Down Expand Up @@ -209,20 +215,27 @@ def BSF_page():
if jaspar == 'JASPAR_ID':
with REcol1:
st.markdown("🔹 :blue[**Step 2.3**] JASPAR ID:")
jaspar_id = st.text_input("🔹 :blue[**Step 2.3**] JASPAR ID:", value="MA0106.1",
jaspar_id = st.text_input("🔹 :blue[**Step 2.3**] JASPAR ID:",
value="MA0106.1" if 'JASPAR_ID_save' not in st.session_state
else st.session_state['JASPAR_ID_save'],
label_visibility='collapsed')

TF_name, TF_species, matrix, weblogo = IMO.matrix_extraction(jaspar_id)
if TF_name != 'not found':
st.success(f"{TF_species} transcription factor {TF_name}")
with REcol2:
st.image(weblogo)
button = False
error_input_im = True
st.session_state['JASPAR_ID_save'] = jaspar_id
if jaspar_id:
TF_name, TF_species, matrix, weblogo = IMO.matrix_extraction(jaspar_id)
if TF_name != 'not found':
st.success(f"{TF_species} transcription factor {TF_name}")
with REcol2:
st.image(weblogo)
button = False
error_input_im = True
else:
button = True
error_input_im = False
st.error('Wrong JASPAR_ID')
else:
button = True
error_input_im = False
st.error('Wrong JASPAR_ID')
st.warning('Please enter a JASPAR_ID')

elif jaspar == 'PWM':
with REcol1:
Expand All @@ -235,30 +248,46 @@ def BSF_page():
st.markdown("🔹 :blue[**Step 2.3**] Matrix:",
help="Only PWM generated with our tools are allowed")
matrix_str = st.text_area("🔹 :blue[**Step 2.3**] Matrix:",
value="A [ 20.0 0.0 0.0 0.0 0.0 0.0 0.0 100.0 0.0 60.0 20.0 ]\nT [ 60.0 20.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ]\nG [ 0.0 20.0 100.0 0.0 0.0 100.0 100.0 0.0 100.0 40.0 0.0 ]\nC [ 20.0 60.0 0.0 100.0 100.0 0.0 0.0 0.0 0.0 0.0 80.0 ]",
value="A [ 20.0 0.0 0.0 0.0 0.0 0.0 0.0 100.0 0.0 60.0 20.0 ]\nT [ 60.0 20.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ]\nG [ 0.0 20.0 100.0 0.0 0.0 100.0 100.0 0.0 100.0 40.0 0.0 ]\nC [ 20.0 60.0 0.0 100.0 100.0 0.0 0.0 0.0 0.0 0.0 80.0 ]"
if 'MATRIX_STR_save' not in st.session_state else st.session_state['MATRIX_STR_save'],
label_visibility='collapsed', height=125)
st.session_state['MATRIX_STR_save'] = matrix_str

lines = matrix_str.split("\n")
matrix = {}
for line in lines:
parts = line.split("[")
base = parts[0].strip()
values = [float(val.strip()) for val in parts[1][:-1].split()]
matrix[base] = values
if len(lines) > 1:
for line in lines:
parts = line.split("[")
base = parts[0].strip()
values = [float(val.strip()) for val in parts[1][:-1].split()]
matrix[base] = values

try:
IMO.has_uniform_column_length(matrix_str)
error_input_im = True
except Exception as e:
try:
IMO.has_uniform_column_length(matrix_str)

weblogo = IMO.PWM_to_weblogo(matrix_str)
st.pyplot(weblogo.fig)
logo = io.BytesIO()
weblogo.fig.savefig(logo, format='png')
logo.seek(0)
st.session_state['weblogo'] = logo

error_input_im = True
except Exception as e:
error_input_im = False
REcol2.error(e)
else:
error_input_im = False
st.error(e)
REcol2.warning("Please input your PWM :)")
else:
with REcol1:
st.markdown("🔹 :blue[**Step 2.3**] Sequences:",
help='Put FASTA sequences. Same sequence length required ⚠')
individual_motif = st.text_area("🔹 :blue[**Step 2.3**] Sequences:",
value=">seq1\nCTGCCGGAGGA\n>seq2\nAGGCCGGAGGC\n>seq3\nTCGCCGGAGAC\n>seq4\nCCGCCGGAGCG\n>seq5\nAGGCCGGATCG",
value=">seq1\nCTGCCGGAGGA\n>seq2\nAGGCCGGAGGC\n>seq3\nTCGCCGGAGAC\n>seq4\nCCGCCGGAGCG\n>seq5\nAGGCCGGATCG"
if 'individual_motif_save' not in st.session_state else st.session_state['individual_motif_save'],
label_visibility='collapsed')
st.session_state['individual_motif_save'] = individual_motif
individual_motif = individual_motif.upper()
isUIPAC = True

Expand All @@ -280,7 +309,7 @@ def BSF_page():
error_input_im = True
except Exception as e:
error_input_im = False
st.error(e)
REcol1.error(e)

else:
with REcol1:
Expand All @@ -290,6 +319,7 @@ def BSF_page():
st.session_state[
'IUPAC_seq'],
label_visibility='collapsed')
st.session_state['IUPAC_seq'] = IUPAC
IUPAC = IUPAC.upper()

IUPAC_code = ['A', 'T', 'G', 'C', 'R', 'Y', 'M', 'K', 'W', 'S', 'B', 'D', 'H', 'V', 'N', '-', '.']
Expand Down Expand Up @@ -322,7 +352,7 @@ def BSF_page():
error_input_im = True
except Exception as e:
error_input_im = False
st.error(e)
REcol1.error(e)
else:
st.error(sequences)
isUIPAC = False
Expand All @@ -348,7 +378,7 @@ def BSF_page():

with BSFcol2:
st.markdown("🔹 :blue[**Step 2.5**] Relative Score threshold")
auto_thre = st.checkbox("Automatic threshold", value=True)
auto_thre = st.toggle("Automatic threshold", value=True)
if auto_thre:
threshold_entry = 0
else:
Expand All @@ -357,7 +387,7 @@ def BSF_page():
label_visibility="collapsed")
with BSFcol3:
st.markdown("🔹 :blue[**_Experimental_**] Calcul _p-value_", help='Experimental, take more times.')
pvalue = st.checkbox('_p-value_')
pvalue = st.toggle('_p-value_')
if pvalue:
if total_sequences > 10:
st.markdown(
Expand Down Expand Up @@ -459,19 +489,21 @@ def BSF_page():
mime="application/vnd.ms-excel", key='download-excel')
st.download_button(label="💾 Download table (.csv)", data=csv_file,
file_name=f"Results_TFinder_{current_date_time}.csv", mime="text/csv")
email_receiver = st.text_input('Send results by email ✉',
value='', placeholder='Send results by email ✉',
label_visibility="collapsed")
if st.button("Send ✉"):
if jaspar == 'PWM':
if matrix_type == 'With PWM':
body = f"Hello 🧬\n\nResults obtained with TFinder.\n\nPosition Weight Matrix:\n{matrix_text}\n\nThis email also includes the sequences used in FASTA format and an Excel table of results.\n\nFor all requests/information, please refer to the 'Contact' tab on the TFinder website. We would be happy to answer all your questions.\n\nBest regards\nTFinder Team 🔎🧬"
if matrix_type == 'With FASTA sequences':
body = f"Hello 🧬\n\nResults obtained with TFinder.\n\nResponsive Elements:\n{individual_motif}\n\nPosition Weight Matrix:\n{matrix_text}\n\nThis email also includes the sequences used in FASTA format and an Excel table of results.\n\nFor all requests/information, please refer to the 'Contact' tab on the TFinder website. We would be happy to answer all your questions.\n\nBest regards\nTFinder Team 🔎🧬"
elif jaspar == 'JASPAR_ID':
body = f"Hello 🧬\n\nResults obtained with TFinder.\n\nJASPAR_ID: {jaspar_id} | Transcription Factor name: {TF_name}\n\nThis email also includes the sequences used in FASTA format and an Excel table of results.\n\nFor all requests/information, please refer to the 'Contact' tab on the TFinder website. We would be happy to answer all your questions.\n\nBest regards\nTFinder Team 🔎🧬"
else:
body = f"Hello 🧬\n\nResults obtained with TFinder.\n\nResponsive Elements:\n{IUPAC}\n\nPosition Weight Matrix:\n{matrix_text}\n\nThis email also includes the sequences used in FASTA format and an Excel table of results.\n\nFor all requests/information, please refer to the 'Contact' tab on the TFinder website. We would be happy to answer all your questions.\n\nBest regards\nTFinder Team 🔎🧬"
email(excel_file, csv_file, txt_output, email_receiver, body, jaspar)

if st.session_state["LOCAL"] == "False":
email_receiver = st.text_input('Send results by email ✉',
value='', placeholder='Send results by email ✉',
label_visibility="collapsed")
if st.button("Send ✉"):
if jaspar == 'PWM':
if matrix_type == 'With PWM':
body = f"Hello 🧬\n\nResults obtained with TFinder.\n\nPosition Weight Matrix:\n{matrix_text}\n\nThis email also includes the sequences used in FASTA format and an Excel table of results.\n\nFor all requests/information, please refer to the 'Contact' tab on the TFinder website. We would be happy to answer all your questions.\n\nBest regards\nTFinder Team 🔎🧬"
if matrix_type == 'With FASTA sequences':
body = f"Hello 🧬\n\nResults obtained with TFinder.\n\nResponsive Elements:\n{individual_motif}\n\nPosition Weight Matrix:\n{matrix_text}\n\nThis email also includes the sequences used in FASTA format and an Excel table of results.\n\nFor all requests/information, please refer to the 'Contact' tab on the TFinder website. We would be happy to answer all your questions.\n\nBest regards\nTFinder Team 🔎🧬"
elif jaspar == 'JASPAR_ID':
body = f"Hello 🧬\n\nResults obtained with TFinder.\n\nJASPAR_ID: {jaspar_id} | Transcription Factor name: {TF_name}\n\nThis email also includes the sequences used in FASTA format and an Excel table of results.\n\nFor all requests/information, please refer to the 'Contact' tab on the TFinder website. We would be happy to answer all your questions.\n\nBest regards\nTFinder Team 🔎🧬"
else:
body = f"Hello 🧬\n\nResults obtained with TFinder.\n\nResponsive Elements:\n{IUPAC}\n\nPosition Weight Matrix:\n{matrix_text}\n\nThis email also includes the sequences used in FASTA format and an Excel table of results.\n\nFor all requests/information, please refer to the 'Contact' tab on the TFinder website. We would be happy to answer all your questions.\n\nBest regards\nTFinder Team 🔎🧬"
email(excel_file, csv_file, txt_output, email_receiver, body, jaspar)
else:
st.error(f"No consensus sequence found with the specified threshold")
129 changes: 29 additions & 100 deletions navigation/pwm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,107 +18,36 @@
# OUT OF OR IN CONNECTION WITH TFINDER OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import streamlit as st
import json
import numpy as np
import logomaker


def pwm_page():
def calculate_pwm(sequences):
pwm = np.zeros((4, sequence_length))
for i in range(sequence_length):
counts = {'A': 0, 'T': 0, 'C': 0, 'G': 0}
for sequence in sequences:
nucleotide = sequence[i]
if nucleotide in counts:
counts[nucleotide] += 1
pwm[0, i] = counts['A'] / num_sequences * 100
pwm[1, i] = counts['T'] / num_sequences * 100
pwm[2, i] = counts['G'] / num_sequences * 100
pwm[3, i] = counts['C'] / num_sequences * 100

return pwm

def parse_fasta(fasta_text):
sequences = []
current_sequence = ""

for line in fasta_text.splitlines():
if line.startswith(">"):
if current_sequence:
sequences.append(current_sequence)
current_sequence = ""
else:
current_sequence += line

if current_sequence:
sequences.append(current_sequence)

return sequences

def create_web_logo(sequences):
matrix = logomaker.alignment_to_matrix(sequences)
logo = logomaker.Logo(matrix, color_scheme='classic')

return logo

st.subheader("🧮 PWM generator")
import io

fasta_text = st.text_area("Put FASTA sequences. Same sequence length required ⚠️", height=300)

if st.button('Generate PWM'):
sequences = parse_fasta(fasta_text)
sequences = [seq.upper() for seq in sequences]

if len(sequences) > 0:
sequence_length = len(sequences[0])
num_sequences = len(sequences)
inconsistent_lengths = False

for sequence in sequences[1:]:
if len(sequence) != sequence_length:
inconsistent_lengths = True
break

if inconsistent_lengths:
st.error("Sequence lengths are not consistent.")
else:
pwm = calculate_pwm(sequences)

st.subheader("PWM: ")
st.info("⬇️ Select and copy")
bases = ['A', 'T', 'G', 'C']
pwm_text = ""
for i in range(len(pwm)):
base_name = bases[i]
base_values = pwm[i]

base_str = base_name + " ["
for value in base_values:
base_str += "\t" + format(value) + "\t" if np.isfinite(value) else "\t" + "NA" + "\t"

base_str += "]\n"
pwm_text += base_str

st.text_area("PWM résultante", value=pwm_text)

sequences_text = fasta_text
sequences = []
current_sequence = ""
for line in sequences_text.splitlines():
line = line.strip()
if line.startswith(">"):
if current_sequence:
sequences.append(current_sequence)
current_sequence = ""
else:
current_sequence += line
import streamlit as st

sequences.append(current_sequence)
from tfinder import IMO

logo = create_web_logo(sequences)
st.pyplot(logo.fig)

else:
st.warning("You forgot FASTA sequences :)")
def pwm_page():
col1, col2 = st.columns(2)
individual_motif = col1.text_area("🔹 :blue[**Step 2.3**] Sequences:",
value=">seq1\nCTGCCGGAGGA\n>seq2\nAGGCCGGAGGC\n>seq3\nTCGCCGGAGAC\n>seq4\nCCGCCGGAGCG\n>seq5\nAGGCCGGATCG"
if 'individual_motif_save' not in st.session_state else st.session_state[
'individual_motif_save'], height=125,
help='Put FASTA sequences. Same sequence length required ⚠')
st.session_state['individual_motif_save'] = individual_motif
individual_motif = individual_motif.upper()

try:
matrix, weblogo = IMO.individual_motif_pwm(individual_motif)
matrix_str = ""
for base, values in matrix.items():
values_str = " ".join([f"{val:.4f}" for val in values])
matrix_str += f"{base} [ {values_str} ]\n"
matrix_text = col2.text_area('PWM', value=matrix_str, height=125,
help='Copy to use later. Not editable.',
disabled=True)
st.pyplot(weblogo.fig)
logo = io.BytesIO()
weblogo.fig.savefig(logo, format='png')
logo.seek(0)
st.session_state['weblogo'] = logo
except Exception as e:
st.error(e)
Loading

0 comments on commit db91039

Please sign in to comment.