-
Notifications
You must be signed in to change notification settings - Fork 84
/
Copy pathSplitZWR.py
executable file
·101 lines (92 loc) · 3.45 KB
/
SplitZWR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python
# Split a .zwr files into pieces of maximum size:
#
# python SplitZWR.py --size <MiB> *.zwr
#
# or
#
# ls *.zwr | python SplitZWR.py --size <MiB> --stdin
#
#---------------------------------------------------------------------------
# Copyright 2011 The Open Source Electronic Health Record Agent
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#---------------------------------------------------------------------------
from builtins import object
import argparse
import codecs
import os
import sys
class SplitZWR(object):
def __init__(self, filepath, maxSize):
self.maxSize = maxSize
self.dir = os.path.dirname(filepath)
nameSplit = os.path.basename(filepath).split('+',1)
if len(nameSplit) > 1:
self.num, self.name = nameSplit
else:
self.num=0
self.name=nameSplit[0]
self.input = codecs.open(filepath, 'r', encoding='ISO-8859-1', errors='ignore')
self.headers = []
while len(self.headers) < 2:
self.headers.append(self.input.readline())
self.hdrSize = sum([len(l) for l in self.headers])
self.outSize = self.maxSize
self.outFile = None
self.index = 0
def new_file(self):
self.index += 1
outName = '%s-%d+%s' % (self.num, self.index, self.name)
outPath = os.path.join(self.dir, outName)
self.outFile = codecs.open(outPath, 'w', encoding="ISO-8859-1", errors='ignore')
self.outFile.writelines(self.headers)
self.outSize = self.hdrSize
sys.stdout.write(' %s\n' % outPath)
def do_line(self, line):
if self.outSize + len(line) > self.maxSize:
self.new_file()
self.outSize += len(line)
self.outFile.write(line)
def run(self):
for line in self.input:
self.do_line(line)
def splitZWR(f, maxSize):
sys.stdout.write('Splitting "%s":\n' % f)
SplitZWR(f, maxSize).run()
os.remove(f)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--size', dest='size', action='store',
type = int, required=True,
metavar='<MiB>', help='max output file size in MiB')
parser.add_argument('--stdin', dest='stdin',
action='store_const', const=True, default=False,
help='read files to split from standard input lines')
parser.add_argument('files', action='append', nargs='*', metavar='<files>',
help='files to split')
config = parser.parse_args()
maxSize = int(config.size) << 20
files = config.files[0]
if config.stdin:
files.extend([a.rstrip() for a in sys.stdin])
for f in files:
if "DD.zwr" in f:
continue
if f[-4:].lower() != '.zwr':
sys.stderr.write('Skipping non-.zwr file: %s\n' % f)
continue
if os.stat(f).st_size > maxSize:
splitZWR(f, maxSize)
if __name__ == '__main__':
main()