-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch.py
101 lines (80 loc) · 3.5 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
import re
import xlsxwriter
from datetime import datetime
import dateutil
from dateutil.relativedelta import *
workbook = xlsxwriter.Workbook('cases_new.xlsx')
worksheet = workbook.add_worksheet()
outputRow = 0
formatDate = workbook.add_format()
formatDate.set_num_format('dd/mm/yyyy')
formatTitle = workbook.add_format({'bg_color':'#FFA0A0'})
worksheet.write_string(outputRow,0,"IPP",formatTitle)
worksheet.write_string(outputRow,1,"sampleID",formatTitle)
worksheet.write_string(outputRow,2,"name",formatTitle)
worksheet.write_string(outputRow,3,"gender",formatTitle)
worksheet.write_string(outputRow,4,"birthdate",formatTitle)
worksheet.write_string(outputRow,5,"sample date",formatTitle)
worksheet.write_string(outputRow,6,"age at sampling",formatTitle)
worksheet.write_string(outputRow,7,"clinical info",formatTitle)
worksheet.write_string(outputRow,8,"diagnostic",formatTitle)
worksheet.freeze_panes(1, 0)
#with open("TESTDATA.txt") as target:
with open("ALLDATA.txt") as target:
for line in target:
match = re.match("Rapport anatomo-pathologique\s+Examen N°\s+(?P<sampleID>H\d{7})", line)
if not match:
continue
sampleID = match.group('sampleID')
match = False
while not match:
line = next(target)
match = re.match("Patient\s+(?P<name>[A-Z ]+,[A-Z ]+)\s+\((?P<gender>[FM])\)\s+Date de prélèvement :\s+(?P<sampleDate>\d{1,2}\.\d{1,2}\.\d{4})", line)
name = match.group('name')
gender = match.group('gender')
sampleDate = datetime.strptime(match.group('sampleDate'),'%d.%m.%Y')
match = False
while not match:
line = next(target)
match = re.match("né\(e\)\s+le\s+(?P<birthDate>\d{1,2}\.\d{1,2}\.\d{4})", line)
birthDate = datetime.strptime(match.group('birthDate'),'%d.%m.%Y')
ageAtSampling = dateutil.relativedelta.relativedelta(sampleDate, birthDate).years
outputRow+=1
worksheet.write_string(outputRow,1,sampleID)
worksheet.write_string(outputRow,2,name)
worksheet.write_string(outputRow,3,gender)
worksheet.write_datetime(outputRow,4,birthDate,formatDate)
worksheet.write_datetime(outputRow,5,sampleDate,formatDate)
worksheet.write_number(outputRow,6,ageAtSampling)
diagnostic = False
clinical = False
while not (diagnostic and clinical):
line = next(target)
if re.match("tél: 0", line):
break
if re.match("Diagnostic :", line):
lineList = ""
nextLine = next(target)
while not re.match("\s*\n", nextLine):
lineList += nextLine
nextLine = next(target)
diagnostic = lineList
if re.match("Renseignements cliniques :", line):
lineList = ""
nextLine = next(target)
while not re.match("\s*\n", nextLine):
lineList += nextLine
nextLine = next(target)
clinical = lineList
if not (diagnostic and clinical):
print("Broken record for name: %s" % name)
print("<<<<%s>>>>" % clinical)
print("<<<<%s>>>>" % diagnostic)
continue;
worksheet.write_string(outputRow,7,clinical)
worksheet.write_string(outputRow,8,diagnostic)
#print("name:%s"%name)
worksheet.autofilter(0,0,outputRow,8)
workbook.close()
target.closed