-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
114 lines (94 loc) · 3.29 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import numpy as np
import torch as th
from tqdm import tqdm
import math
from contextlib import contextmanager
import torch.nn.functional as F
from time import time
import deepspeed
try:
__IPYTHON__
run_from_ipython = True
except NameError:
run_from_ipython = False
def randexclude(rng: np.random.RandomState, n: int, exclude: int) -> int:
while True:
x = rng.randint(n)
if x != exclude:
return x
def tohuman(n: int) -> str:
if n > 1e9:
return f'{n / 1e9:.1f}B'
elif n > 1e6:
return f'{n / 1e6:.1f}M'
elif n > 1e3:
return f'{n / 1e3:.1f}K'
return str(n)
def logvars(name, logs, xs):
xs = th.vstack(xs)
logs.update({ f'{name}-mean': xs.mean(),
f'{name}-std': xs.std(),
f'{name}-min': xs.min(),
f'{name}-max': xs.max() })
def batch_map(fn, xs, bsize: int, desc=None):
out = []
for ind in tqdm(range(math.ceil(len(xs) / bsize)), desc=desc, disable=not desc):
batch = xs[ind*bsize:min(len(xs), (ind+1)*bsize)]
out.extend(fn(batch))
return out
def load_tensors(name, texts, reward_model, tokenizer, max_length=64, use_cache=True):
cache_path = f'cache/{name}_{max_length=}_tokenizer={tokenizer.name_or_path.split("/")[-1]}.pt'
if use_cache and os.path.exists(cache_path):
tensors = th.load(cache_path)
else:
tensors = tokenizer(
[tokenizer.bos_token + x for x in texts],
max_length=max_length,
truncation=True,
padding=True,
return_tensors='pt'
)
trimmed_texts = tokenizer.batch_decode(tensors['input_ids'], skip_special_tokens=True)
rewards = th.as_tensor(reward_model(trimmed_texts))
rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-30)
rewards = rewards.view(-1, 1).repeat(1, tensors['input_ids'].shape[1])
rewards[tensors['attention_mask'].eq(0)] = 0
tensors['rewards'] = rewards
tensors['attention_mask'] = F.pad(tensors['attention_mask'], (0, 1), value=0)
tensors['input_ids'] = F.pad(tensors['input_ids'], (0, 1), value=tokenizer.eos_token_id)
if not os.path.exists(os.path.dirname(cache_path)):
os.mkdir(os.path.dirname(cache_path))
th.save(tensors, cache_path)
print(f"{tohuman(np.prod(tensors['input_ids'].shape))} tokens")
return tensors
def isdelim(c: str):
return c == '?' or c == '!' or c == '.' or c == ';'
def pprint(s):
trig = False
si = 0
l = len(s)-1
for i in range(len(s)):
if i == l:
print(s[si:].strip())
elif trig or isdelim(s[i]):
trig = True
if s[i].isspace():
print(s[si:i+1].strip())
si = i + 1
trig = False
@contextmanager
def timeit(desc='something important'):
print(f'{desc}...')
stime = time()
try:
yield None
finally:
print(f'done with {desc.lower()} in {time() - stime:.1f}s')
def check_weights(param):
if os.environ.get('DEEPSPEED_ZERO_STAGE', '0') == '3':
with deepspeed.zero.GatheredParameters(param[0].weight, modifier_rank=0):
if deepspeed.comm.get_rank() == 0:
return param[0].weight.sum()
else:
return param[0].weight.sum()