-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlog_conversion.py
2154 lines (1917 loc) · 87.9 KB
/
log_conversion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 18 14:57:35 2023
@author: ZI\stepan.tikhomirov
"""
import pandas as pd
from scipy.io import loadmat
from io import StringIO
import functools
def read_cleanreversal(file_dir, task_parameters):
'''
Read and process reversal task files
Parameters
----------
file_dir : string (path is independent from system)
Path to the reversal task file
task_parameters : dict
Dictionary of task parameters for different tasks, e.g. correct answers to a task.
Returns
-------
df_new : pandas.core.frame.DataFrame
Final dataframe which will be converted to tsv
df_timing_events : pandas.core.frame.DataFrame
Dataframe containing log data which will be used for plotting
'''
df_no_pulses = loadmat(file_dir)
# Get number of trials
# ("nt" is the number of the last trial)
nr_trials = int(df_no_pulses['nt'].flatten())
# Create dataframe with timing of stimulus, response, and feedback
# Select time variable in mat file (structure)
timings_mat = df_no_pulses['T']
# A structure is read by scipy as a list of arrays with column names or keys as dtype
timings_columns = timings_mat.dtype
# Create dictionary of timings
timings_dict = {n: timings_mat[n][0, 0] for n in timings_columns.names}
# Get keys of timing vectors (i.e. those with length equal to the number of trials)
timings_keys = [n for n, v in timings_dict.items() if v.size == nr_trials]
# Remove onset_trialend timing - not informative (in place method)
timings_keys.remove("onset_trialend")
# Create data frame with trial numbers as index
df_timing = pd.DataFrame(
np.concatenate([timings_dict[c].reshape((nr_trials, 1))
for c in timings_keys], axis=1),
index=list(range(0, nr_trials)),
columns=timings_keys)
if "choice_onset" not in df_timing.columns:
return None, None
# Change column names to be consistent with BIDS format
df_timing = df_timing.rename(
columns={'trial_onset': 'stim', 'onset_fb': 'feedback', 'choice_onset': 'response'})
# Change timing of response = 0 to nan
df_timing.loc[df_timing["response"] == 0, "response"] = np.nan
# Convert all columns to float type
df_timing = df_timing.astype(
{"stim": np.float64, "feedback": np.float64, "response": np.float64})
# Subtract timing of first volume to make sure the "onset" column indicates timing wrt the first volume
time_first_vol = timings_dict['time_begin'].flatten()[
0] # Get timing of first volume to make sure the "onset" column indicates timing wrt the first volume
df_timing = (
df_timing - time_first_vol) # * convert_time_unit_to_sec # Not necessary to convert, data already appears to be in seconds
# Wide to long format
df_timing_long = df_timing.unstack().reset_index()
# Rename columns
df_timing_long = df_timing_long.rename(
columns={'level_0': 'event_type', 'level_1': 'trial_nr', 0: 'onset'})
# Make sure trial numbers start at 1
df_timing_long["trial_nr"] += 1
# Create event types dataframe
sel_vars = ["A", "C", "R", "S", "rt",
"p_u",
"random_lr"] # , "ISI", "ISI_final", "pres_ISI", "ITI", "pres_ITI", "ITI_w_null"] # Selected variables from .mat file to keep
df_events = pd.DataFrame(
np.concatenate([df_no_pulses[c].reshape((1, nr_trials))
for c in sel_vars], axis=0).transpose(),
index=list(range(0, nr_trials)),
columns=sel_vars)
# Rename columns
df_events = df_events.rename(
columns={'A': 'response', 'C': 'correct', 'R': 'reward', 'S': 'correct_resp', 'rt': 'response_time',
'p_u': 'prob_events', 'random_lr': 'random_leftright'}) # 'V':'block_nr',
# Add block number
version = df_no_pulses["Task_Version"]
if version == "A":
# reversal_version_A["V"]
df_events["block_nr"] = task_parameters["version_A"]["block_nr"]
elif version == "B":
# reversal_version_B["V"]
df_events["block_nr"] = task_parameters["version_B"]["block_nr"]
# Change response and state from numeric to string (1 -> Card1; 2 -> Card2)
df_events["response"] = df_events["response"].replace(
[1, 2], ["Card1", "Card2"])
df_events["correct_resp"] = df_events["correct_resp"].replace([1, 2], [
"Card1", "Card2"])
# Change the random_leftright (indicator of on which side card 1 is) from numeric binary to string binary to make it more interpretable; Card 1 is always drawn on the side indicated by random_leftright (see rev_trial.m)
df_events["random_leftright"] = df_events["random_leftright"].replace([1, 2], [
"left", "right"])
# Trial type
df_events["trial_type"] = np.select([
((df_events["correct_resp"] == df_events["response"])
& (df_events["prob_events"] == 0)),
# informative reward = correct rewarded response
((df_events["correct_resp"] == df_events["response"])
& (df_events["prob_events"] == 1)),
# misleading punishment = probabilistic error
((df_events["correct_resp"] != df_events["response"])
& (df_events["prob_events"] == 0)),
# misleading reward = probabilistic win
((df_events["correct_resp"] != df_events["response"])
& (df_events["prob_events"] == 1)),
# informative punishment = incorrect response
], [
"informative reward", "misleading punishment", "misleading reward", "informative punishment"
])
# Create trial_nr column out of index
df_events["trial_nr"] = df_events.index + 1
# Merge events with timing dataframe
df_timing_events = pd.merge(df_timing_long, df_events, on=["trial_nr"])
# Add value column and fill with shown stimulus (location of both Card 1 and 2) and response
df_timing_events["reversal_display"] = np.nan
stim = np.select([
(df_events["random_leftright"] ==
"left"), (df_events["random_leftright"] == "right")
], [
"Card2_right_Card1_left", "Card1_right_Card2_left"]
)
df_timing_events.loc[df_timing_events["event_type"]
== "stim", "reversal_display"] = stim
df_timing_events.loc[df_timing_events["event_type"] ==
"response", "reversal_display"] = df_events["response"]
# Remove first NaN and append NaN
# Compute duration
df_timing_events["duration"] = list(df_timing_events["onset"].diff().values[1:]) + [
np.nan]
# Reorder columns; drop response column
df_timing_events = df_timing_events[
["onset", "duration", "trial_nr", "trial_type", "event_type", "reversal_display", "response", "correct", "response_time", "correct_resp"]] # .sort_values(by=["event_type", "trial_nr"], key=lambda x: x.map({'stim': 0, 'response': 1, 'feedback': 2} ))
# Custom sorting: create categorical event_type column and sort dataframe based on that
cat_event_type_order = pd.CategoricalDtype(
["stim", "response", "feedback"],
ordered=True
)
# Change to categorical
df_timing_events['event_type'] = df_timing_events['event_type'].astype(
cat_event_type_order)
# Sort values
df_timing_events = df_timing_events.sort_values(
by=['trial_nr', 'event_type'])
# Change categorical back to string (necessary to fill n/a values later)
df_timing_events['event_type'] = df_timing_events['event_type'].astype(
'string')
df_timing_events = df_timing_events[df_timing_events["event_type"] != "feedback"]
df_new = df_timing_events.copy()
df_new = df_new[~df_new['event_type'].isin(['response'])]
df_new.drop(columns=["trial_nr"], inplace=True)
df_new.reset_index(drop=True, inplace=True)
df_new = df_new.fillna("n/a")
return df_new, df_timing_events
def read_clean_other_tasks(file_dir):
'''
Read and clean faces, nback, reward task files
Parameters
----------
file_dir : string (path is independent from system)
Path to the log file
Returns
-------
bids_df : pandas.core.frame.DataFrame
Final dataframe which will be converted to tsv
data_string : str
Original string containing the whole log file
'''
#open the file in python
with open(file_dir, 'r') as file:
data_string = file.read()
# Separate raw dataframe from already reduced dataframe, if the latter is present
idx_start_Trial = [m.start()
for m in re.finditer('Trial', data_string)]
idx_start_EventType = [m.start()
for m in re.finditer('Event Type', data_string)]
# If there are multiple "Event Type" columns, extract the relevant data based on their positions.
if len(idx_start_EventType) > 1:
# Separate the raw dataframe from any already reduced dataframe, if present.
data_string = pd.read_csv(
StringIO(data_string[idx_start_Trial[0]:idx_start_EventType[1]]), sep='\t')
else:
# If there is only one "Event Type" column, extract the dataframe starting from the first "Trial."
data_string = pd.read_csv(
StringIO(data_string[idx_start_Trial[0]:]), sep='\t')
#clean the data. remove pulse, quit, instruct etc.
time_first_vol = data_string.loc[data_string["Event Type"] == "Pulse", "Time"].values[0]
data_string["Time"] -= time_first_vol
data_string = data_string[data_string["Event Type"] != "Quit"]
data_string = data_string[data_string["Event Type"] != "Pulse"]
data_string = data_string[data_string["Code"] != "instruct"]
data_string = data_string[data_string["Code"] != "ende"]
data_string = data_string[data_string["Code"] != "black screen"]
data_string = data_string[data_string["Code"] != "fixation"]
data_string = data_string[data_string["Code"] != "perf"]
#converting time into seconds
data_string['onset'] = data_string['Time'].astype(float) / 10000
data_string['duration'] = data_string['Duration'].astype(float) / 10000
# Select 'Event Type' column
data_string['event_type'] = data_string['Code']
# Create a new DataFrame with the desired columns
bids_df = data_string[['onset', 'duration', 'event_type']].fillna(
0).reset_index(drop=True)
start_time = bids_df["onset"].min()
bids_df["onset"] = bids_df["onset"] - start_time
return bids_df, data_string
def read_clean_selfref(file_dir):
'''
Rean and clean selfref task lig files
Parameters
----------
file_dir : string (path is independent from system)
Path to the log file
Returns
-------
bids_df : pandas.core.frame.DataFrame
Final dataframe which will be converted into tsv file.
data_string : TYPE
DESCRIPTION.
'''
try:
# Attempt to read the file using UTF-8 encoding.
with open(file_dir, 'r', encoding='utf-8') as file:
data_string = file.read()
except:
# If UTF-8 encoding fails, try ISO-8859-1 encoding.
with open(file_dir, 'r', encoding='iso-8859-1') as file:
data_string = file.read()
#replace german letters
data_string = data_string.replace('Ü', 'UE')
data_string = data_string.replace('ü', 'ue')
data_string = data_string.replace('Ä', 'AE')
data_string = data_string.replace('ä', 'ae')
data_string = data_string.replace('Ö', 'OE')
data_string = data_string.replace('ö', 'oe')
idx_start_Trial = [m.start() for m in re.finditer('Trial', data_string)]
idx_start_EventType = [m.start()
for m in re.finditer('Event Type', data_string)]
# If there are multiple "Event Type" columns, extract the relevant data based on their positions.
if len(idx_start_EventType) > 1:
# Separate the raw dataframe from any already reduced dataframe, if present.
data_string = pd.read_csv(
StringIO(data_string[idx_start_Trial[0]:idx_start_EventType[1]]), sep='\t')
else:
# If there is only one "Event Type" column, extract the dataframe starting from the first "Trial."
data_string = pd.read_csv(StringIO(data_string[idx_start_Trial[0]:]), sep='\t')
##clean the data. remove pulse, quit, instruct etc.
time_first_vol = data_string.loc[data_string["Event Type"] == "Pulse", "Time"].values[0]
data_string["Time"] -= time_first_vol
data_string = data_string[data_string["Event Type"] != "Quit"]
data_string = data_string[data_string["Event Type"] != "Pulse"]
data_string = data_string[data_string["Code"] != "instruct"]
data_string = data_string[data_string["Code"] != "ende"]
data_string = data_string[data_string["Code"] != "black screen"]
data_string = data_string[data_string["Code"] != "fixation"]
#convert the time into seconds
data_string['onset'] = data_string['Time'].astype(float) / 10000
data_string['duration'] = data_string['Duration'].astype(float) / 10000
# Select 'Event Type' column
data_string['event_type'] = data_string['Code']
# Create a new DataFrame with the desired columns
bids_df = data_string[['onset', 'duration', 'event_type']].fillna(
0).reset_index(drop=True)
# Remove rows at index 0 and 1 (they contain number 2 in the event_type, though there is no stimulus)
bids_df = bids_df.drop([0, 1]).reset_index(drop=True)
start_time = bids_df["onset"].min()
bids_df["onset"] = bids_df["onset"] - start_time
return bids_df, data_string
def clean_read_data(subject, session, task, table, plotFlag=True):
'''
Combine reading, cleaning, processing and plotting functions
Parameters
----------
subject : string
Subject number
session : string
Session number
task : string
Task name
table: pandas.core.frame.DataFrame
Table which contains paths of each available event file
plotFlag : bool
Object which is True by default and responsible for plotting
Returns
-------
bids_df_new : pandas.core.frame.DataFrame
Final dataframe which is converted to tsv file.
'''
output_file_name = None
# Filter the table based on subject, session, and task
filtered_data = table[(table['subject'] == subject) & (
table['session'] == session) & (table['task'] == task)]
# initialize path variable
log_path = ""
if not filtered_data.empty:
#extract log and bold path if they are found
bold_paths = filtered_data.iloc[0]['func']
log_paths = filtered_data.iloc[0]['event']
if log_paths == []:
return None, None, None
#There are three reversal files. To ensure that we process the correct file, we should create a filter condition (ends with "WS.mat")
if task == "reversal":
if log_paths:
for path in log_paths:
if path.endswith("WS.mat"):
log_path = path
break
if bold_paths:
bold_path = bold_paths[0]
# the same with nback. we should take .xls file
elif task == "nback":
if log_paths:
for path in log_paths:
if path.endswith(".xls"):
log_path = path
break
if bold_paths:
bold_path = bold_paths[0]
#In case of reward we should take .log file
elif task == "reward":
if log_paths:
for path in log_paths:
if path.endswith(".log"):
log_path = path
break
if bold_paths:
bold_path = bold_paths[0]
else:
if log_paths:
log_path = log_paths[0]
if bold_paths:
bold_path = bold_paths[0]
else:
return None, None, None
bids_df_new = None
#start reading, cleaning and plotting data
if task == "selfref":
# firstly, open the file and clean it
bids_df, df_raw = read_clean_selfref(log_path)
if bids_df is not None:
#if bids_df is not none we can start to process this data
bids_df = calculate_response_times(bids_df)
#take the correct file name using bold file
output_file_name = os.path.split(bold_path)[-1]
output_file_name = output_file_name.replace('bold.nii.gz', '')
output_file_path = os.path.join(
rawdata_dir, "sub-" + subject, 'ses-' + session, "func", output_file_name + "events.tsv")
bids_df_new, bids_df = process_selfref(bids_df, answer_file_df)
bids_df_new.to_csv(output_file_path, sep='\t', index=False)
if plotFlag:
fig = plotting_selfref(bids_df)
output_file_path = os.path.join(
derivatives_dir, "plots", "sub-" + subject, output_file_name + "events.html")
pio.write_html(fig, output_file_path, auto_open=False)
dict_selfref = json_selfref()
output_file_path1 = os.path.join(
rawdata_dir, "sub-" + subject, 'ses-' + session, "func", output_file_name + "events.json")
with open(output_file_path1, 'w') as json_data:
json.dump(dict_selfref, json_data)
elif task == "reversal":
bids_df_new, bids_df = read_cleanreversal(
log_path, dict_reversal_answers)
if bids_df_new is not None and bids_df is not None:
output_file_name = os.path.split(bold_path)[-1]
output_file_name = output_file_name.replace('bold.nii.gz', '')
output_file_path = os.path.join(
rawdata_dir, "sub-" + subject, 'ses-' + session, "func", output_file_name + "events.tsv")
bids_df_new.to_csv(output_file_path, sep='\t', index=False)
if plotFlag:
fig = plotting_reversal(bids_df)
output_file_path = os.path.join(
derivatives_dir, "plots", "sub-" + subject, output_file_name + "events.html")
pio.write_html(fig, output_file_path, auto_open=False)
elif task == "reward":
bids_df, df_raw = read_clean_other_tasks(log_path)
if bids_df is not None:
output_file_name = os.path.split(bold_path)[-1]
output_file_name = output_file_name.replace('bold.nii.gz', '')
output_file_path = os.path.join(
rawdata_dir, "sub-" + subject, 'ses-' + session, "func", output_file_name + "events.tsv")
bids_df = calculate_response_times(bids_df)
bids_df_new = process_reward(bids_df)
bids_df_new.to_csv(output_file_path, sep='\t', index=False)
if plotFlag:
fig = plotting_reward(bids_df_new)
output_file_path = os.path.join(
derivatives_dir, "plots", "sub-" + subject, output_file_name + "events.html")
pio.write_html(fig, output_file_path, auto_open=False)
dict_reward = json_reward()
output_file_path1 = os.path.join(
rawdata_dir, "sub-" + subject, 'ses-' + session, "func", output_file_name + "events.json")
with open(output_file_path1, 'w') as json_data:
json.dump(dict_reward, json_data)
elif task == "faces":
bids_df, df_raw = read_clean_other_tasks(log_path)
if bids_df is not None:
output_file_name = os.path.split(bold_path)[-1]
output_file_name = output_file_name.replace('bold.nii.gz', '')
output_file_path = os.path.join(
rawdata_dir, "sub-" + subject, 'ses-' + session, "func", output_file_name + "events.tsv")
bids_df = calculate_response_times(bids_df)
bids_df_new, bids_df = process_faces(bids_df, dict_faces_answers)
bids_df_new.to_csv(output_file_path, sep='\t', index=False)
if plotFlag:
fig = plotting_faces(bids_df)
output_file_path = os.path.join(
derivatives_dir, "plots", "sub-" + subject, output_file_name + "events.html")
pio.write_html(fig, output_file_path, auto_open=False)
dict_faces = json_faces()
output_file_path1 = os.path.join(
rawdata_dir, "sub-" + subject, 'ses-' + session, "func", output_file_name + "events.json")
with open(output_file_path1, 'w') as json_data:
json.dump(dict_faces, json_data)
elif task == "nback":
bids_df, df_raw = read_clean_other_tasks(log_path)
if bids_df is not None:
output_file_name = os.path.split(bold_path)[-1]
output_file_name = output_file_name.replace('bold.nii.gz', '')
output_file_path = os.path.join(
rawdata_dir, "sub-" + subject, 'ses-' + session, "func", output_file_name + "events.tsv")
bids_df = calculate_response_times(bids_df)
bids_df_new, bids_df = process_nback(bids_df)
bids_df_new.to_csv(output_file_path, sep='\t', index=False)
if plotFlag:
fig = plotting_nback(bids_df)
output_file_path = os.path.join(
derivatives_dir, "plots", "sub-" + subject, output_file_name + "events.html")
pio.write_html(fig, output_file_path, auto_open=False)
dict_nback = json_nback()
output_file_path1 = os.path.join(
rawdata_dir, "sub-" + subject, 'ses-' + session, "func", output_file_name + "events.json")
with open(output_file_path1, 'w') as json_data:
json.dump(dict_nback, json_data)
return bids_df_new, output_file_name, log_path
def calculate_response_times(bids_df):
'''
Calculate response time (response time - stimulus time) for faces, selfref, nback and reward tasks
Parameters
----------
bids_df : pandas.core.frame.DataFrame
Cleaned dataframe (not final dataframe).
Returns
-------
bids_df : pandas.core.frame.DataFrame
The same datframe with the response time column.
'''
def remove_second_consecutive_1(df):
'''
Remove the second consecutive response (as we need the first response)
Parameters
----------
df : pandas.core.frame.DataFrame
Cleaned dataframe (not final dataframe).
Returns
-------
df : pandas.core.frame.DataFrame
Cleaned dataframe.
'''
remove_doubleresp = []
for i in range(1, len(df)):
if df.at[i, 'event_type'] == "1" and df.at[i - 1, 'event_type'] == "1":
remove_doubleresp.append(i)
df = df.drop(remove_doubleresp).reset_index(drop=True)
return df
bids_df = remove_second_consecutive_1(bids_df)
indices_to_remove = []
i = 0
# Loop through the DataFrame
while i < len(bids_df):
if bids_df.at[i, 'event_type'] in ["vCSp", "lCSp", "wCSp", "no UCS", "CSm"]:
if i + 1 < len(bids_df) and str(bids_df.at[i + 1, 'event_type']).isdigit():
indices_to_remove.append(i + 1)
i += 1
else:
i += 1
# Remove the rows using the collected indices
bids_df.drop(indices_to_remove, inplace=True)
# Reset the index
bids_df.reset_index(drop=True, inplace=True)
response_indices = bids_df[(bids_df['event_type'] == "1") | (bids_df['event_type'] == "2") | (bids_df['event_type'] == "3")
| (bids_df['event_type'] == "4")].index
last_stimulus = {} # Keep track of the last stimulus for each type
for index in response_indices:
response_event = bids_df.loc[index]
response_type = response_event['event_type']
closest_stimulus = None
min_time_diff = float('inf')
indices_to_remove = []
i = 0
# Loop through the DataFrame
while i < len(bids_df):
if bids_df.at[i, 'event_type'] in ["vCSp", "lCSp", "wCSp"]:
if i + 1 < len(bids_df) and bids_df.at[i + 1, 'event_type'].isdigit():
indices_to_remove.append(i + 1)
i += 1
else:
i += 1
# Remove the rows using the collected indices
bids_df.drop(indices_to_remove, inplace=True)
# Reset the index
bids_df.reset_index(drop=True, inplace=True)
# Find the closest preceding non-"1" or non-"2" event of the same type
for i in range(index - 1, -1, -1):
if bids_df.loc[i, 'event_type'] == response_type:
continue # Skip consecutive "1" or "2" events of the same type
if bids_df.loc[i, 'event_type'] not in ["1", "2", "3", "4"]:
time_diff = response_event['onset'] - bids_df.loc[i, 'onset']
if time_diff < min_time_diff:
min_time_diff = time_diff
closest_stimulus = bids_df.loc[i]
# Calculate the response time and update the DataFrame in the same row
if closest_stimulus is not None:
response_time = response_event['onset'] - closest_stimulus['onset']
bids_df.at[index-1, 'response_time'] = response_time
# Update the last stimulus for this type
last_stimulus[response_type] = closest_stimulus
return bids_df
def process_faces(df, correct_responses):
'''
Process dataframes for faces task
Parameters
----------
df : pandas.core.frame.DataFrame
Cleaned dataframe with response time column.
correct_responses : dict
Correct answers for faces task
Returns
-------
df_new : pandas.core.frame.DataFrame
Final dataframe which will be converted to tsv
df : pandas.core.frame.DataFrame
Dataframe which will be used for plotting.
'''
conditions = []
current_condition = None
for event_type in df['event_type']:
if event_type == 'MatchForms':
current_condition = 'MatchForms'
elif event_type == 'MatchFaces':
current_condition = 'MatchFaces'
conditions.append(current_condition)
df['trial_type'] = conditions
df = df[~df['event_type'].isin(['MatchForms', 'MatchFaces'])]
# Reset the index
df.reset_index(drop=True, inplace=True)
# Initialize 'Correctness' column in the original DataFrame
df['correct'] = None
correctness = []
for index, row in df.iterrows():
if row['event_type'] in ['1', '2']:
response = int(row['event_type'])
previous_row = df.iloc[index - 1]
stimulus = previous_row['event_type']
if stimulus in correct_responses and correct_responses[stimulus] == response:
correctness.append("Correct")
else:
correctness.append("Incorrect")
# Set the response in the 'responses' column of the stimulus row
df.at[index - 1, 'responses'] = response
# Assign the correctness values to the 'Correctness' column
df.loc[df['event_type'].isin(['1', '2']), 'correct'] = correctness
# Create a new DataFrame with 'Correctness' shifted one step up
df_new = df.copy()
df_new['correct'] = df_new['correct'].shift(-1)
# Remove responses (1, 2) from the 'event_type' column in df_new
df_new = df_new[~df_new['event_type'].isin(['1', '2'])]
df_new = df_new.rename(columns={'event_type': 'faces_display'})
for index, row in df_new.iterrows():
if row['correct'] == "Correct":
df_new.at[index, 'correct'] = "1"
elif row['correct'] == "Incorrect":
df_new.at[index, 'correct'] = "0"
df_new = df_new.fillna("n/a")
return df_new, df
def process_nback(df):
'''
Process dataframes for nback task
Parameters
----------
df : pandas.core.frame.DataFrame
Cleaned dataframe with response time column.
Returns
-------
df_new : pandas.core.frame.DataFrame
Final dataframe which will be converted to tsv
df : pandas.core.frame.DataFrame
Dataframe which will be used for plotting.
'''
stimuli = []
responses = []
correctness = []
# To keep track of the three previous stimuli (it is important for 2back condition)
previous_stimuli = [None, None, None]
#creating condition (trial_type) column
conditions = []
current_condition = None
for event_type in df['event_type']:
if event_type == '0back':
current_condition = '0back'
elif event_type == '2back':
current_condition = '2back'
conditions.append(current_condition)
df['trial_type'] = conditions
df = df[~df['event_type'].isin(['0back', '2back'])]
# Reset the index
df.reset_index(drop=True, inplace=True)
for i in range(len(df)):
event_type = df['event_type'][i]
if event_type in ['num1', 'num2', 'num3', 'num4']:
#define the stimuli
stimuli.append(event_type)
responses.append(None)
previous_stimuli[2] = previous_stimuli[1]
previous_stimuli[1] = previous_stimuli[0]
previous_stimuli[0] = event_type
elif event_type in ['1', '2', '3', '4']:
#define the responses
stimuli.append(None)
responses.append(int(event_type))
else:
stimuli.append(None)
responses.append(None)
if df['trial_type'][i] == '0back':
if previous_stimuli[0] is not None and responses[i] is not None:
# taking the number from the stimulus title (num1 - 1)
#after that compare the response with the previous stimulus number (0back)
if responses[i] == int(previous_stimuli[0].replace('num', '')):
correctness.append('Correct')
else:
correctness.append('Incorrect')
else:
correctness.append(None)
elif df['trial_type'][i] == '2back':
#the same but 2 back
if responses[i] is not None and all(x is not None for x in previous_stimuli):
if responses[i] == int(previous_stimuli[2].replace('num', '')):
correctness.append('Correct')
else:
correctness.append('Incorrect')
else:
correctness.append(None)
else:
correctness.append(None)
for index, row in df.iterrows():
if row['event_type'] in ['1', '2', '3', '4']:
response = int(row['event_type'])
df.at[index - 1, 'responses'] = response
df['correct'] = correctness
df_new = df.copy()
df_new['correct'] = df_new['correct'].shift(-1)
# The last element gets shifted out, so fill the last row with None
df_new.at[df.index[-1], 'correct'] = None
df_new = df_new.rename(columns={'event_type': 'nback_display'})
df_new = df_new[~df_new['nback_display'].isin(['1', '2', '3', '4'])]
for index, row in df_new.iterrows():
if row['correct'] == "Correct":
df_new.at[index, 'correct'] = "1"
elif row['correct'] == "Incorrect":
df_new.at[index, 'correct'] = "0"
df_new = df_new.fillna("n/a")
return df_new, df
def process_selfref(df, answer_file_df):
'''
Process dataframes for selfref task
Parameters
----------
df : pandas.core.frame.DataFrame
Cleaned dataframe with response time column.
answer_file_df : pandas.core.frame.DataFrame
Correct answers for selfref task
Returns
-------
df_new : pandas.core.frame.DataFrame
Final dataframe which will be converted to tsv
df : pandas.core.frame.DataFrame
Dataframe which will be used for plotting.
'''
# the same method to create condition column
conditions = []
current_condition = None
for event_type in df['event_type']:
if event_type == 'Selbst':
current_condition = 'Self'
elif event_type == 'Merkel':
current_condition = 'Merkel'
elif event_type == "Silben":
current_condition = "Syllables"
conditions.append(current_condition)
df['trial_type'] = conditions
df = df[~df['event_type'].isin(["Selbst", 'Merkel', "Silben"])]
df.reset_index(drop=True, inplace=True)
#correct responses as dict
stimuli_to_responses = dict(
zip(answer_file_df['word'].str.lower(), answer_file_df['Correct']))
correctness = []
#checking the correctness only for syllables condition
for index, row in df.iterrows():
if row['trial_type'] == "Syllables" and row['event_type'] in ['1', '2']:
if index > 0:
previous_row = df.loc[index - 1]
if previous_row['trial_type'] == "Syllables" and "Silben" in previous_row['event_type']:
# Extract and lowercase the word after the hyphen
stimulus = previous_row['event_type'].split(
' - ')[1].lower()
if stimulus in stimuli_to_responses:
response = int(row['event_type'])
correct_response = stimuli_to_responses[stimulus]
if response == correct_response:
correctness.append("Correct")
else:
correctness.append("Incorrect")
else:
correctness.append("None")
if row['event_type'] in ['1', '2']:
response = int(row['event_type'])
df.at[index - 1, 'responses'] = response
else:
# For rows that are not 'Syllables' or not responses
correctness.append("None")
df['correct'] = correctness
df = df[df['trial_type'] != None]
df_new = df.copy()
df_new['correct'] = df_new['correct'].shift(-1)
# The last element gets shifted out, so fill the last row with None
df_new.at[df.index[-1], 'correct'] = "None"
df_new = df_new.rename(columns={'event_type': 'selfref_display'})
df_new = df_new[~df_new['selfref_display'].isin(['1', '2'])]
for index, row in df_new.iterrows():
if row['correct'] == "Correct":
df_new.at[index, 'correct'] = "1"
elif row['correct'] == "Incorrect":
df_new.at[index, 'correct'] = "0"
df_new = df_new.fillna("n/a")
return df_new, df
def process_reward(df1):
'''
Process dataframes for reward task
Parameters
----------
df1 : pandas.core.frame.DataFrame
Cleaned dataframe with response time column.
Returns
-------
df_new : pandas.core.frame.DataFrame
Final dataframe which will be converted into tsv.
'''
conditions = []
current_condition = None
balance = 0 # Initialize the balance
balance_initialized = False # Flag to track if balance has been initialized
for event_type in df1['event_type']:
if event_type == 'wCSp':
current_condition = 'wCSp'
elif event_type == 'vCSp':
current_condition = 'vCSp'
elif event_type == "CSm":
current_condition = "CSm"
elif event_type == "lCSp":
current_condition = "lCSp"
conditions.append(current_condition)
df1['trial_type'] = conditions
results = []
# these values are taken from the ask code
time_window_beg_trial = 0.3
on_time_ratio = 0.95
slow_ratio = 1.05
df1["correct"] = None
on_time = None
for i in range(len(df1)):
event_type = df1.at[i, "event_type"]
if event_type in ["vCSp", "lCSp", "wCSp"]:
current_condition = event_type
if event_type == "1":
response_time = df1.at[i - 1, "response_time"]
#on time if the response time was less than the time window
on_time = response_time <= time_window_beg_trial if response_time is not None else None
correct = "fast" if on_time else "slow"
results.append(correct)
if not balance_initialized:
# Initialize balance only once
balance = 0
balance_initialized = True
if event_type == "UCS" and (i + 1 >= len(df1) or df1.at[i + 1, "event_type"] != "1"):
#if there is no response
results.append("n/a")
if event_type == "Feedback":
if results:
# multiply time window and on_time_ratio if the response is fast and if it is slow that multiply it by slow_ratio
time_window_beg_trial *= on_time_ratio if results[-1] == "fast" else slow_ratio
# if ICSp and slow response than subsract 2 from the balance
# if wCSp and the response is fast than add 2 to balance
trial_type = df1.at[i, "trial_type"]
if trial_type == "lCSp" and not on_time:
balance -= 2
elif trial_type == "wCSp" and on_time:
balance += 2
# Set the balance for the current trial
df1.at[i, "balance"] = balance
#delete UCS (flash) from the event_type column and create flash column
for index, row in df1.iterrows():
if row['event_type'] in ["UCS", "no UCS"]:
flash = row['event_type']
df1.at[index - 1, 'flash'] = flash
df1['response_time'] = df1['response_time'].shift(-1)
# Add the results to the DataFrame
df1.loc[df1["event_type"] == "Feedback", "correct"] = results
# Remove unnecessary rows
df_new = df1[~df1['event_type'].isin(['1', "Balance", "UCS", "no UCS"])]
df_new = df_new.rename(columns={"event_type": "reward_display"})
df_new = df_new.fillna("n/a")
return df_new
# correct answers for faces task
faces_answer_string = """
file ecode itime resp ;
"Form_1.jpg" Form_1 5000 1 ;
"Form_2.jpg" Form_2 5000 2 ;
"Form_3.jpg" Form_3 5000 2 ;
"Form_4.jpg" Form_4 5000 1 ;
"Form_5.jpg" Form_5 5000 2 ;
"Form_6.jpg" Form_6 5000 1 ;
"boy4a.jpg" boy4a 5000 2 ;
"girl2a.jpg" girl2a 5000 1 ;
"boy2a.jpg" boy2a 5000 2 ;
"girl4a.jpg" girl4a 5000 1 ;
"boy6a.jpg" boy6a 5000 1 ;
"girl3a.jpg" girl3a 5000 2 ;
"Form_6.jpg" Form_6 5000 1 ;
"Form_5.jpg" Form_5 5000 2 ;
"Form_4.jpg" Form_4 5000 1 ;
"Form_3.jpg" Form_3 5000 2 ;
"Form_2.jpg" Form_2 5000 2 ;
"Form_1.jpg" Form_1 5000 1 ;
"girl1a.jpg" girl1a 5000 2 ;
"boy5a.jpg" boy5a 5000 2 ;
"girl5a.jpg" girl5a 5000 2 ;
"boy3a.jpg" boy3a 5000 1 ;
"girl6a.jpg" girl6a 5000 1 ;
"boy1a.jpg" boy1a 5000 1 ;
"Form_1.jpg" Form_1 5000 1 ;
"Form_2.jpg" Form_2 5000 2 ;
"Form_3.jpg" Form_3 5000 2 ;
"Form_4.jpg" Form_4 5000 1 ;
"Form_5.jpg" Form_5 5000 2 ;
"Form_6.jpg" Form_6 5000 1 ;
"boy4a.jpg" boy4a 5000 2 ;
"girl2a.jpg" girl2a 5000 1 ;
"boy2a.jpg" boy2a 5000 2 ;
"girl4a.jpg" girl4a 5000 1 ;
"boy6a.jpg" boy6a 5000 1 ;
"girl3a.jpg" girl3a 5000 2 ;
"Form_6.jpg" Form_6 5000 1 ;
"Form_5.jpg" Form_5 5000 2 ;
"Form_4.jpg" Form_4 5000 1 ;
"Form_3.jpg" Form_3 5000 2 ;
"Form_2.jpg" Form_2 5000 2 ;
"Form_1.jpg" Form_1 5000 1 ;
"girl1a.jpg" girl1a 5000 2 ;
"boy5a.jpg" boy5a 5000 2 ;
"girl5a.jpg" girl5a 5000 2 ;
"boy3a.jpg" boy3a 5000 1 ;
"girl6a.jpg" girl6a 5000 1 ;
"boy1a.jpg" boy1a 5000 1 ;
"""
faces_answer_string = faces_answer_string.replace("\t\t\t", "\t").replace("\t\t", "\t").replace(";",
"") # Replace triple and double tabs with single tab and remove semicolon
df_faces_answers = pd.read_csv(StringIO(faces_answer_string), sep='\t')
df_faces_answers.columns = [column_name.strip() for column_name in
df_faces_answers.columns] # Strip white spaces from column names
df_faces_answers = df_faces_answers.loc[:, [
"ecode", "resp"]].drop_duplicates() # .to_dict()
dict_faces_answers = dict([(key.strip(), value) for key, value in zip(df_faces_answers['ecode'],
df_faces_answers[
'resp'])])
# reversal correct answers
dict_reversal_answers = {}
# First do version A
dict_reversal_answers["version_A"] = {}
dict_reversal_answers["version_A"]["S"] = list([
[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9,
0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9,
0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.3, 0.3,