Skip to content

Commit

Permalink
Update for iarg.
Browse files Browse the repository at this point in the history
  • Loading branch information
zzsfornlp committed Apr 15, 2020
1 parent 379020c commit 80a7fa8
Show file tree
Hide file tree
Showing 60 changed files with 9,664 additions and 149 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ How to configurate, generally: [here](docs/conf.md)

Related works:

"A Two-Step Approach for Implicit Event Argument Detection": [todo]()

Some other parsers for interested readers: [todo]()

"An Empirical Investigation of Structured Output Modeling for Graph-based Neural Dependency Parsing": [details](docs/emp_graph.md)

"On Difficulties of Cross-Lingual Transfer with Order Differences: A Case Study on Dependency Parsing": [details](docs/cl0.md)
19 changes: 15 additions & 4 deletions msp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#

# The Mingled Structured Prediction (v0plus) package
# by zzs (from 2018.02 - now)
# author: zzs
# time: 2018.02 - now

# dependencies: pytorch, numpy, scipy, gensim, cython, pybind11
# conda install pytorch numpy scipy gensim cython pybind11
# dependencies: pytorch, numpy, scipy, gensim, cython, pybind11, pandas
# conda install pytorch numpy scipy gensim cython pybind11 pandas

VERSION_MAJOR = 0
VERSION_MINOR = 1
VERSION_PATCH = 1
VERSION_STATUS = "dev"

# TODO(!)
# specific todos
# nn optimizer / param groups?
# check nn module (for simplification?)
# new model/training/testing scheme -> make it more modularized
Expand All @@ -20,6 +21,16 @@
# easy-to-use calculations result-representing tools for analysis
# various tools for python as the replacement of direct bash shell
# gru and cnn have problems?
# ----
# -- Next Version principles and goals:
# nlp data types
# use type hint
# checkings and reportings
# use eval for Conf
# io and serialization
# summarize more common patterns, including those in scripts
# everything has (more flexible) conf
# more flexible save/load for part of model; (better naming and support dynamic adding and deleting components!!)

def version():
return (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH, VERSION_STATUS)
Expand Down
13 changes: 9 additions & 4 deletions msp/data/streamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,18 @@ class FileOrFdStreamer(Streamer):
def __init__(self, file_or_fd):
super().__init__()
self.file = file_or_fd
self.fd = None
self.input_is_fd = not isinstance(file_or_fd, str)
if self.input_is_fd:
self.fd = file_or_fd
else:
self.fd = None

def __del__(self):
if self.fd is not None:
if self.fd is not None and not self.input_is_fd:
self.fd.close()

def _restart(self):
if isinstance(self.file, str):
if not self.input_is_fd:
if self.fd is not None:
self.fd.close()
self.fd = zopen(self.file)
Expand Down Expand Up @@ -474,7 +478,8 @@ def _next(self):
while self.buffered_bsize_ < self.k:
one = self.base_streamer_.next()
if self.base_streamer_.is_eos(one):
break
# todo(+N): this actually does not ensure the end if base_streamer can re-produce things
break # should have check active, currently skip this, assuming base_streamer's resposibility
# dump instances (like short or long instances)
dump_instance = any(f_(one) for f_ in self.dump_detectors)
if dump_instance:
Expand Down
29 changes: 19 additions & 10 deletions msp/data/vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from typing import Dict, Sequence

from msp.utils import zopen, zlog, zwarn, zcheck, StrHelper, FileHelper, Helper, JsonRW, PickleRW, printing, Random
from collections import Iterable, defaultdict
from collections import Iterable, defaultdict, OrderedDict
import numpy as np
import re

# for binary w2v loading
from gensim.models import KeyedVectors
# # for binary w2v loading
# from gensim.models import KeyedVectors

#

Expand Down Expand Up @@ -190,7 +190,7 @@ def filter_embed(self, wv: 'WordVectors', init_nohit=0., scale=1.0, assert_all_h
#
class VocabHelper:
# todo(0): I guess this will make them unique
SPECIAL_PATTERN = re.compile(r"\<z_([a-zA-Z]{3})_z\>")
SPECIAL_PATTERN = re.compile(r"\<z_([a-zA-Z]+)_z\>")

@staticmethod
def extract_name(w):
Expand Down Expand Up @@ -310,12 +310,15 @@ def filter_vals(word_vals, word_filter=(lambda ww, rank, val: True)):

# {word->vals} => {word->idx}, [filtered values]
@staticmethod
def ranking_vals(word_vals, pre_list, post_list, default_val, word_filter=(lambda ww, rank, val: True)):
ranked_list = Helper.rank_key(word_vals)
def ranking_vals(word_vals, pre_list, post_list, default_val, sort_vals, word_filter=(lambda ww, rank, val: True)):
if sort_vals:
valid_word_list = Helper.rank_key(word_vals)
else:
valid_word_list = word_vals.keys()
#
truncated_vals = [default_val] * len(pre_list)
v = dict(zip(pre_list, range(len(pre_list))))
for ii, ww in enumerate(ranked_list):
for ii, ww in enumerate(valid_word_list):
rank, val = ii+1, word_vals[ww]
if word_filter(ww, rank, val):
v[ww] = len(v)
Expand All @@ -337,9 +340,13 @@ def rf_filter(ww,rank,val): return val>=fthres and rank<=rthres
#
def finish(self, word_filter=(lambda ww, rank, val: True), sort_by_count=True, target_range=DEFAULT_TARGET_RANGE):
v = self.v
# sort by count-value otherwise adding orders
tmp_vals = self.counts_ if sort_by_count else {k:-i for i,k in enumerate(self.keys_) if k in self.counts_}
v.v, v.final_vals = VocabBuilder.ranking_vals(tmp_vals, v.pre_list, v.post_list, self.default_val_, word_filter=word_filter)
if sort_by_count:
v.v, v.final_vals = VocabBuilder.ranking_vals(
self.counts_, v.pre_list, v.post_list, self.default_val_, True, word_filter=word_filter)
else:
tmp_counts_ = OrderedDict([(k, self.counts_[k]) for k in self.keys_])
v.v, v.final_vals = VocabBuilder.ranking_vals(
tmp_counts_, v.pre_list, v.post_list, self.default_val_, False, word_filter=word_filter)
v.final_words = Helper.reverse_idx(v.v)
printing("Build Vocab %s ok, from %d to %d, as %s." % (v.name, len(self.counts_), len(v), str(v)))
#
Expand Down Expand Up @@ -512,6 +519,8 @@ def _load_bin(fname):
printing("Going to load pre-trained (binary) w2v from %s ..." % fname)
one = WordVectors()
#
from gensim.models import KeyedVectors
#
kv = KeyedVectors.load_word2vec_format(fname, binary=True)
# KeyedVectors.save_word2vec_format()
one.num_words, one.embed_size = len(kv.vectors), len(kv.vectors[0])
Expand Down
58 changes: 46 additions & 12 deletions msp/nn/backends/bktr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
from .common import COMMON_CONFIG, get_unique_name, _my_get_params_init

Expr = torch.Tensor
Module = torch.nn.Module
CPU_DEVICE = torch.device("cpu")
DEFAULT_DEVICE = CPU_DEVICE
T_INIT = torch.nn.init

# types
float32 = torch.float32
Expand Down Expand Up @@ -52,19 +54,28 @@ def is_expr(v):
is_tensor = is_expr

# parameter init from BK (similar to common.get_params_init)
# return a tensor here
def get_params_init(shape, init, lookup):
if COMMON_CONFIG.use_my_init:
return _my_get_params_init(shape, init, lookup)
# return a tensor here; (out_p4i is the real shape[0] for more reasonable init for some cases)
def get_params_init(shape, init, lookup, out_p4i, scale):
# if COMMON_CONFIG.use_my_init:
# return _my_get_params_init(shape, init, lookup)
assert not COMMON_CONFIG.use_my_init, "now use ones from pytorch for param init"
x = torch.empty(*shape, dtype=torch.float32, device=DEFAULT_DEVICE)
if len(shape) == 1:
nn.init.zeros_(x)
else:
if lookup:
scale = np.sqrt(3.0 / shape[-1])
nn.init.uniform_(x, -scale, scale)
_iscale = np.sqrt(3.0 / shape[-1])
nn.init.uniform_(x, -_iscale, _iscale)
# todo(+N): again back to previous init method
# nn.init.normal_(x)
x *= scale
elif init == "default" or init == "glorot":
nn.init.xavier_uniform_(x)
out_size = shape[0]
assert out_size % out_p4i == 0, "Bad output shape pieces for init value!"
s0 = out_size//out_p4i
for i in range(out_p4i):
nn.init.xavier_uniform_(x[i*s0:(i+1)*s0])
x *= scale
elif init == "ortho":
# todo(note): assume squared matrices
assert len(shape)==2 and (shape[0]%shape[1]==0 or shape[1]%shape[0]==0), "Invalid shape for ortho init"
Expand Down Expand Up @@ -154,10 +165,20 @@ def nnc_name(self, name, check_stack=True):
def get_unique_name(self, name):
return get_unique_name(self.name_dict, name)

# add a torch.nn.Module's parameters
def param_add_external(self, name, mod: nn.Module):
ret_pairs = []
for one_subname, one_param in mod.named_parameters():
one_subname = "_".join(one_subname.split(".")) # cannot include "."
self.model_.register_parameter(name+"/"+one_subname, one_param)
ret_pairs.append((one_subname, one_param))
return ret_pairs

# register param
def param_new(self, name, shape, init_weights, lookup=False):
# almost all params are float
p = Parameter(torch.as_tensor(init_weights, dtype=torch.float32, device=DEFAULT_DEVICE))
assert name not in self.model_._parameters # no modules in this pc
self.model_.register_parameter(name, p)
return p

Expand All @@ -178,9 +199,10 @@ def param_set_trainable(self, p, trainable):
def optimizer_set(self, optim_type, lrf_sv, oconf, params: List = None, check_repeat=True, check_full=False):
if params is None:
params = self.model_.parameters()
optim = Optim(optim_type, lrf_sv, oconf, params)
cur_optid = len(self.optims_)
self.optims_.append(optim)
if len(params) > 0:
optim = Optim(optim_type, lrf_sv, oconf, params)
cur_optid = len(self.optims_)
self.optims_.append(optim)
# track all params
for p in params:
paramid = id(p)
Expand Down Expand Up @@ -344,11 +366,14 @@ def select(t, idxes, dim=0):
elu = F.elu
exp = torch.exp
expand = lambda x, *args: x.expand(*args)
gelu = getattr(F, "gelu", None) # todo(warn): on older versions, this does not exist
log = torch.log
logsigmoid = F.logsigmoid
logsumexp = torch.logsumexp
max = torch.max # todo(warn): with dim, return tuple
max_elem = torch.max # todo(warn): max_elem(a, b)
min = torch.min
min_elem = torch.min
masked_select = torch.masked_select
matmul = torch.matmul
pad = F.pad
Expand Down Expand Up @@ -536,8 +561,9 @@ def conv(self, input_expr):
def mask2idx(mask_t, padding_idx=0):
mask_shape = get_shape(mask_t) # [*, L]
counts = mask_t.sum(-1).long() # [*]
max_count = counts.max().item() # int, the max expanding
padding_counts = max_count - counts # [*]
max_count_t = counts.max(-1, keepdim=True)[0]
max_count = max_count_t.item() # int, the max expanding
padding_counts = max_count_t - counts # [*]
max_padding_count = padding_counts.max().item() # int, the max count of padding
pad_t = (arange_idx(max_padding_count) < padding_counts.unsqueeze(-1)).float() # [*, max_pad]
concat_t = concat([mask_t, pad_t], -1) # [*, L+max_pad]
Expand All @@ -548,3 +574,11 @@ def mask2idx(mask_t, padding_idx=0):
valid_mask = (ret_idxes < slen).float()
ret_idxes[ret_idxes >= slen] = padding_idx
return ret_idxes, valid_mask

# maxpool 1d at last dim
def max_pool1d(input, kernel):
orig_shape = get_shape(input)
# make it 3d
tmp_res = F.max_pool1d(input.view([-1]+orig_shape[-2:]), kernel)
real_res = tmp_res.view(orig_shape[:-1] + [-1])
return real_res
8 changes: 4 additions & 4 deletions msp/nn/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

from .basic import BasicNode, RefreshOptions, ActivationHelper, Dropout, DropoutLastN
from .basic import NoDropRop, NoFixRop, FreezeRop
from .ff import Affine, LayerNorm, MatrixNode, Embedding, PosiEmbedding, RelPosiEmbedding
from .ff import Affine, LayerNorm, MatrixNode, Embedding, PosiEmbedding, RelPosiEmbedding, PosiEmbedding2
from .multi import Sequential, Summer, Concater, Joiner, \
NodeWrapper, AddNormWrapper, AddActWrapper, HighWayWrapper, get_mlp
from .enc import RnnNode, GruNode, LstmNode, RnnLayer, RnnLayerBatchFirstWrapper, CnnNode, CnnLayer, \
NodeWrapper, AddNormWrapper, AddActWrapper, HighWayWrapper, get_mlp, get_mlp2
from .enc import RnnNode, GruNode, LstmNode, LstmNode2, RnnLayer, RnnLayerBatchFirstWrapper, CnnNode, CnnLayer, \
TransformerEncoderLayer, TransformerEncoder, Transformer2EncoderLayer, Transformer2Encoder
from .att import AttentionNode, FfAttentionNode, MultiHeadAttention, \
MultiHeadRelationalAttention, MultiHeadSelfDistAttention, AttConf, AttDistHelper
from .dec import *
from .biaffine import BiAffineScorer
from .biaffine import BiAffineScorer, PairScorerConf, PairScorer
30 changes: 23 additions & 7 deletions msp/nn/layers/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from ..backends import BK
from ..backends.common import get_unique_name
import numpy as np

from msp.utils import extract_stack

Expand Down Expand Up @@ -40,7 +41,7 @@ def NoFixRop(): return RefreshOptions(fix_drop=False, fix_set=("fix_drop", ))

# helpers
class ActivationHelper(object):
ACTIVATIONS = {"tanh": BK.tanh, "softmax": BK.softmax, "relu": BK.relu, "elu": BK.elu,
ACTIVATIONS = {"tanh": BK.tanh, "softmax": BK.softmax, "relu": BK.relu, "elu": BK.elu, "gelu": BK.gelu,
"sigmoid": BK.sigmoid, "linear": lambda x:x}
# reduction for seq after conv
POOLINGS = {"max": lambda x: BK.max(x, -2)[0], "avg": lambda x: BK.avg(x, -2)}
Expand Down Expand Up @@ -109,11 +110,12 @@ def get_output_dims(self, *input_dims):
return input_dims

# create param from PC
def add_param(self, name, shape, init=None, lookup=False, check_stack=True):
def add_param(self, name, shape, init=None, lookup=False, check_stack=True, out_p4i=1, scale=1.):
if init is None:
w = BK.get_params_init(shape, "default", lookup)
elif isinstance(init, str):
w = BK.get_params_init(shape, init, lookup)
init = "default"
# -----
if isinstance(init, str):
w = BK.get_params_init(shape, init, lookup, out_p4i, scale)
else:
w = init
name = self.get_unique_name(name)
Expand Down Expand Up @@ -142,9 +144,17 @@ def get_parameters(self, recursively=True):
ret.extend(node.get_parameters(recursively))
return ret

# count number of parameters
def count_allsize_parameters(self, recursively=True):
count = 0
list_params = self.get_parameters(recursively)
for p in list_params:
count += np.prod(BK.get_shape(p))
return int(count)

# commonly used Nodes
class Dropout(BasicNode):
def __init__(self, pc, shape, which_drop="hdrop", name=None, init_rop=None):
def __init__(self, pc, shape, which_drop="hdrop", name=None, init_rop=None, fix_rate=None):
super().__init__(pc, name, init_rop)
self.f_ = None
self.shape = shape
Expand All @@ -155,12 +165,18 @@ def __init__(self, pc, shape, which_drop="hdrop", name=None, init_rop=None):
if which_drop == "gdrop":
self.rop.fix_drop = True
self.rop.add_to_fix_set("fix_drop")
assert fix_rate is None
#
self.fix_rate = fix_rate

def refresh(self, rop=None):
super().refresh(rop)
#
r = self.rop
drop = self.drop_getter_(r)
if self.fix_rate is not None:
drop = self.fix_rate
else:
drop = self.drop_getter_(r)
# todo(+3): another overall switch, not quite elegant!
if not r.training:
self.f_ = lambda x: x
Expand Down
Loading

0 comments on commit 80a7fa8

Please sign in to comment.