From 32ce9e7f75bd97e38bc41b2033b688fc17094d75 Mon Sep 17 00:00:00 2001 From: lihangtian <936971274@qq.com> Date: Thu, 28 Jul 2022 17:10:26 +0800 Subject: [PATCH 1/8] [ModelZoo] Support Co_Action Network --- modelzoo/CAN/prepare_data.sh | 9 + modelzoo/CAN/script/Dice.py | 35 + modelzoo/CAN/script/calc_ckpt.py | 14 + modelzoo/CAN/script/data_iterator.py | 228 ++++ modelzoo/CAN/script/generate_voc.py | 91 ++ modelzoo/CAN/script/generate_voc.py.bk | 65 ++ modelzoo/CAN/script/local_aggretor.py | 46 + modelzoo/CAN/script/model.py | 800 +++++++++++++ modelzoo/CAN/script/model_avazu.py | 973 ++++++++++++++++ modelzoo/CAN/script/process_data.py | 101 ++ modelzoo/CAN/script/rnn.py | 1454 ++++++++++++++++++++++++ modelzoo/CAN/script/shuffle.py | 42 + modelzoo/CAN/script/split_by_user.py | 20 + modelzoo/CAN/script/test.py | 10 + modelzoo/CAN/script/train.py | 293 +++++ modelzoo/CAN/script/utils.py | 404 +++++++ 16 files changed, 4585 insertions(+) create mode 100644 modelzoo/CAN/prepare_data.sh create mode 100644 modelzoo/CAN/script/Dice.py create mode 100644 modelzoo/CAN/script/calc_ckpt.py create mode 100644 modelzoo/CAN/script/data_iterator.py create mode 100644 modelzoo/CAN/script/generate_voc.py create mode 100644 modelzoo/CAN/script/generate_voc.py.bk create mode 100644 modelzoo/CAN/script/local_aggretor.py create mode 100644 modelzoo/CAN/script/model.py create mode 100644 modelzoo/CAN/script/model_avazu.py create mode 100644 modelzoo/CAN/script/process_data.py create mode 100644 modelzoo/CAN/script/rnn.py create mode 100644 modelzoo/CAN/script/shuffle.py create mode 100644 modelzoo/CAN/script/split_by_user.py create mode 100644 modelzoo/CAN/script/test.py create mode 100644 modelzoo/CAN/script/train.py create mode 100644 modelzoo/CAN/script/utils.py diff --git a/modelzoo/CAN/prepare_data.sh b/modelzoo/CAN/prepare_data.sh new file mode 100644 index 00000000000..110b9559129 --- /dev/null +++ b/modelzoo/CAN/prepare_data.sh @@ -0,0 +1,9 @@ +export PATH="~/anaconda4/bin:$PATH" +wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Books.json.gz +wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz +gunzip reviews_Books.json.gz +gunzip meta_Books.json.gz +python script/process_data.py meta_Books.json reviews_Books_5.json +python script/local_aggretor.py +python script/split_by_user.py +python script/generate_voc.py diff --git a/modelzoo/CAN/script/Dice.py b/modelzoo/CAN/script/Dice.py new file mode 100644 index 00000000000..160fb3d909e --- /dev/null +++ b/modelzoo/CAN/script/Dice.py @@ -0,0 +1,35 @@ +import tensorflow as tf + +def dice(_x, axis=-1, epsilon=0.000000001, name=''): + with tf.variable_scope(name, reuse=tf.AUTO_REUSE): + alphas = tf.get_variable('alpha'+name, _x.get_shape()[-1], + initializer=tf.constant_initializer(0.0), + dtype=tf.float32) + input_shape = list(_x.get_shape()) + + reduction_axes = list(range(len(input_shape))) + del reduction_axes[axis] + broadcast_shape = [1] * len(input_shape) + broadcast_shape[axis] = input_shape[axis] + + # case: train mode (uses stats of the current batch) + mean = tf.reduce_mean(_x, axis=reduction_axes) + brodcast_mean = tf.reshape(mean, broadcast_shape) + std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes) + std = tf.sqrt(std) + brodcast_std = tf.reshape(std, broadcast_shape) + x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon) + # x_normed = tf.layers.batch_normalization(_x, center=False, scale=False) + x_p = tf.sigmoid(x_normed) + + + return alphas * (1.0 - x_p) * _x + x_p * _x + +def parametric_relu(_x): + alphas = tf.get_variable('alpha', _x.get_shape()[-1], + initializer=tf.constant_initializer(0.0), + dtype=tf.float32) + pos = tf.nn.relu(_x) + neg = alphas * (_x - abs(_x)) * 0.5 + + return pos + neg diff --git a/modelzoo/CAN/script/calc_ckpt.py b/modelzoo/CAN/script/calc_ckpt.py new file mode 100644 index 00000000000..fa5d4bda035 --- /dev/null +++ b/modelzoo/CAN/script/calc_ckpt.py @@ -0,0 +1,14 @@ + +ckpt = tf.train.get_checkpoint_state("./ckpt_path/").model_checkpoint_path +saver = tf.train.import_meta_graph(ckpt+'.meta') +variables = tf.trainable_variables() +total_parameters = 0 +for variable in variables: + shape = variable.get_shape() + variable_parameters = 1 + for dim in shape: + # print(dim) + variable_parameters *= dim.value + # print(variable_parameters) + total_parameters += variable_parameters +print(total_parameters) diff --git a/modelzoo/CAN/script/data_iterator.py b/modelzoo/CAN/script/data_iterator.py new file mode 100644 index 00000000000..b5eef5f9e57 --- /dev/null +++ b/modelzoo/CAN/script/data_iterator.py @@ -0,0 +1,228 @@ +import numpy +import json +#import cPickle as pkl +import _pickle as cPickle +import random + +import gzip + +import shuffle + +def unicode_to_utf8(d): + return dict((key.encode("UTF-8"), value) for (key,value) in d.items()) +def dict_unicode_to_utf8(d): + print('d={}'.format(d)) + return dict(((key[0].encode("UTF-8"), key[1].encode("UTF-8")), value) for (key,value) in d.items()) + +def load_dict(filename): + try: + with open(filename, 'rb') as f: + return unicode_to_utf8(json.load(f)) + except: + try: + with open(filename, 'rb') as f: + return unicode_to_utf8(cPickle.load(f)) + except: + with open(filename, 'rb') as f: + return dict_unicode_to_utf8(cPickle.load(f)) + + +def fopen(filename, mode='r'): + if filename.endswith('.gz'): + return gzip.open(filename, mode) + return open(filename, mode) + + +class DataIterator: + + def __init__(self, source, + uid_voc, + mid_voc, + cat_voc, + batch_size=128, + maxlen=100, + skip_empty=False, + shuffle_each_epoch=False, + sort_by_length=True, + max_batch_size=20, + minlen=None, + label_type=1): + if shuffle_each_epoch: + self.source_orig = source + self.source = shuffle.main(self.source_orig, temporary=True) + else: + self.source = fopen(source, 'r') + self.source_dicts = [] + #for source_dict in [uid_voc, mid_voc, cat_voc, cat_voc, cat_voc]:# 'item_carte_voc.pkl', 'cate_carte_voc.pkl']: + for source_dict in [uid_voc, mid_voc, cat_voc, '/home/test/modelzoo/CAN/data/item_carte_voc.pkl', '/home/test/modelzoo/CAN/data/cate_carte_voc.pkl']: + self.source_dicts.append(load_dict(source_dict)) + + f_meta = open("/home/test/modelzoo/CAN/data/item-info", "r") + meta_map = {} + for line in f_meta: + arr = line.strip().split("\t") + if arr[0] not in meta_map: + meta_map[arr[0]] = arr[1] + self.meta_id_map ={} + for key in meta_map: + val = meta_map[key] + if key in self.source_dicts[1]: + mid_idx = self.source_dicts[1][key] + else: + mid_idx = 0 + if val in self.source_dicts[2]: + cat_idx = self.source_dicts[2][val] + else: + cat_idx = 0 + self.meta_id_map[mid_idx] = cat_idx + + f_review = open("/home/test/modelzoo/CAN/data/reviews-info", "r") + self.mid_list_for_random = [] + for line in f_review: + arr = line.strip().split("\t") + tmp_idx = 0 + if arr[1] in self.source_dicts[1]: + tmp_idx = self.source_dicts[1][arr[1]] + self.mid_list_for_random.append(tmp_idx) + + self.batch_size = batch_size + self.maxlen = maxlen + self.minlen = minlen + self.skip_empty = skip_empty + + self.n_uid = len(self.source_dicts[0]) + self.n_mid = len(self.source_dicts[1]) + self.n_cat = len(self.source_dicts[2]) + self.n_carte = [len(self.source_dicts[3]), len(self.source_dicts[4])] + print("n_uid=%d, n_mid=%d, n_cat=%d" % (self.n_uid, self.n_mid, self.n_cat)) + + self.shuffle = shuffle_each_epoch + self.sort_by_length = sort_by_length + + self.source_buffer = [] + self.k = batch_size * max_batch_size + + self.end_of_data = False + self.label_type = label_type + + def get_n(self): + return self.n_uid, self.n_mid, self.n_cat, self.n_carte + + def __iter__(self): + return self + + def reset(self): + if self.shuffle: + self.source= shuffle.main(self.source_orig, temporary=True) + else: + self.source.seek(0) + + def __next__(self): + if self.end_of_data: + self.end_of_data = False + self.reset() + raise StopIteration + + source = [] + target = [] + + if len(self.source_buffer) == 0: + for k_ in range(self.k): + ss = self.source.readline() + if ss == "": + break + self.source_buffer.append(ss.strip("\n").split("\t")) + + # sort by history behavior length + if self.sort_by_length: + his_length = numpy.array([len(s[4].split("")) for s in self.source_buffer]) + tidx = his_length.argsort() + + _sbuf = [self.source_buffer[i] for i in tidx] + self.source_buffer = _sbuf + else: + self.source_buffer.reverse() + + if len(self.source_buffer) == 0: + self.end_of_data = False + self.reset() + raise StopIteration + + try: + + # actual work here + while True: + + # read from source file and map to word index + try: + ss = self.source_buffer.pop() + except IndexError: + break + + uid = self.source_dicts[0][ss[1]] if ss[1] in self.source_dicts[0] else 0 + mid = self.source_dicts[1][ss[2]] if ss[2] in self.source_dicts[1] else 0 + cat = self.source_dicts[2][ss[3]] if ss[3] in self.source_dicts[2] else 0 + + tmp = [] + item_carte = [] + for fea in ss[4].split(""): + m = self.source_dicts[1][fea] if fea in self.source_dicts[1] else 0 + tmp.append(m) + i_c = self.source_dicts[3][(ss[2], fea)] if (ss[2], fea) in self.source_dicts[3] else 0 + item_carte.append(i_c) + mid_list = tmp + + tmp1 = [] + cate_carte = [] + for fea in ss[5].split(""): + c = self.source_dicts[2][fea] if fea in self.source_dicts[2] else 0 + tmp1.append(c) + c_c = self.source_dicts[4][(ss[3], fea)] if (ss[3], fea) in self.source_dicts[4] else 0 + cate_carte.append(c_c) + cat_list = tmp1 + + # read from source file and map to word index + + if self.minlen != None: + if len(mid_list) <= self.minlen: + continue + if self.skip_empty and (not mid_list): + continue + + noclk_mid_list = [] + noclk_cat_list = [] + for pos_mid in mid_list: + noclk_tmp_mid = [] + noclk_tmp_cat = [] + noclk_index = 0 + while True: + noclk_mid_indx = random.randint(0, len(self.mid_list_for_random)-1) + noclk_mid = self.mid_list_for_random[noclk_mid_indx] + if noclk_mid == pos_mid: + continue + noclk_tmp_mid.append(noclk_mid) + noclk_tmp_cat.append(self.meta_id_map[noclk_mid]) + noclk_index += 1 + if noclk_index >= 5: + break + noclk_mid_list.append(noclk_tmp_mid) + noclk_cat_list.append(noclk_tmp_cat) + carte_list = [item_carte, cate_carte] + source.append([uid, mid, cat, mid_list, cat_list, noclk_mid_list, noclk_cat_list, carte_list]) + if self.label_type == 1: + target.append([float(ss[0])]) + else: + target.append([float(ss[0]), 1-float(ss[0])]) + + if len(source) >= self.batch_size or len(target) >= self.batch_size: + break + except IOError: + self.end_of_data = True + + # all sentence pairs in maxibatch filtered out because of length + if len(source) == 0 or len(target) == 0: + source, target = self.next() + + return source, target + + diff --git a/modelzoo/CAN/script/generate_voc.py b/modelzoo/CAN/script/generate_voc.py new file mode 100644 index 00000000000..03b6a662d97 --- /dev/null +++ b/modelzoo/CAN/script/generate_voc.py @@ -0,0 +1,91 @@ +import pickle as pk + +f_train = open("/home/test/modelzoo/DIEN/data/local_train_splitByUser", "r") +uid_dict = {} +mid_dict = {} +cat_dict = {} +item_carte_dict = {} +cate_carte_dict = {} + +iddd = 0 +for line in f_train: + arr = line.strip("\n").split("\t") + clk = arr[0] + uid = arr[1] + mid = arr[2] + cat = arr[3] + mid_list = arr[4] + cat_list = arr[5] + if uid not in uid_dict: + uid_dict[uid] = 0 + uid_dict[uid] += 1 + if mid not in mid_dict: + mid_dict[mid] = 0 + mid_dict[mid] += 1 + if cat not in cat_dict: + cat_dict[cat] = 0 + cat_dict[cat] += 1 + if len(mid_list) == 0: + continue + for m in mid_list.split(""): + if m not in mid_dict: + mid_dict[m] = 0 + mid_dict[m] += 1 + if (mid, m) not in item_carte_dict: + item_carte_dict[(mid, m)] = 0 + item_carte_dict[(mid, m)] += 1 + #print iddd + iddd+=1 + for c in cat_list.split(""): + if c not in cat_dict: + cat_dict[c] = 0 + cat_dict[c] += 1 + if (cat, c) not in cate_carte_dict: + cate_carte_dict[(cat, c)] = 0 + cate_carte_dict[(cat, c)] += 1 + +sorted_uid_dict = sorted(uid_dict.items(), key=lambda x:x[1], reverse=True) +sorted_mid_dict = sorted(mid_dict.items(), key=lambda x:x[1], reverse=True) +sorted_cat_dict = sorted(cat_dict.items(), key=lambda x:x[1], reverse=True) +sorted_item_carte_dict = sorted(item_carte_dict.items(), key=lambda x:x[1], reverse=True) +sorted_cate_carte_dict = sorted(cate_carte_dict.items(), key=lambda x:x[1], reverse=True) + +uid_voc = {} +index = 0 +for key, value in sorted_uid_dict: + uid_voc[key] = index + index += 1 + +mid_voc = {} +mid_voc["default_mid"] = 0 +index = 1 +for key, value in sorted_mid_dict: + mid_voc[key] = index + index += 1 + +cat_voc = {} +cat_voc["default_cat"] = 0 +index = 1 +for key, value in sorted_cat_dict: + cat_voc[key] = index + index += 1 + +item_carte_voc = {} +item_carte_voc["default_item_carte"] = 0 +index = 1 +for key, value in sorted_item_carte_dict: + item_carte_voc[key] = index + index += 1 + +cate_carte_voc = {} +cate_carte_voc["default_cate_carte"] = 0 +index = 1 +for key, value in sorted_cate_carte_dict: + cate_carte_voc[key] = index + index += 1 + +pk.dump(uid_voc, open("uid_voc.pkl", "wb")) +pk.dump(mid_voc, open("mid_voc.pkl", "wb")) +pk.dump(cat_voc, open("cat_voc.pkl", "wb")) +pk.dump(item_carte_voc, open("item_carte_voc.pkl", "wb")) +pk.dump(cate_carte_voc, open("cate_carte_voc.pkl", "wb")) diff --git a/modelzoo/CAN/script/generate_voc.py.bk b/modelzoo/CAN/script/generate_voc.py.bk new file mode 100644 index 00000000000..411708148aa --- /dev/null +++ b/modelzoo/CAN/script/generate_voc.py.bk @@ -0,0 +1,65 @@ +import cPickle + +f_train = open("local_train_splitByUser", "r") +uid_dict = {} +mid_dict = {} +cat_dict = {} + +iddd = 0 +for line in f_train: + arr = line.strip("\n").split("\t") + clk = arr[0] + uid = arr[1] + mid = arr[2] + cat = arr[3] + mid_list = arr[4] + cat_list = arr[5] + if uid not in uid_dict: + uid_dict[uid] = 0 + uid_dict[uid] += 1 + if mid not in mid_dict: + mid_dict[mid] = 0 + mid_dict[mid] += 1 + if cat not in cat_dict: + cat_dict[cat] = 0 + cat_dict[cat] += 1 + if len(mid_list) == 0: + continue + for m in mid_list.split(""): + if m not in mid_dict: + mid_dict[m] = 0 + mid_dict[m] += 1 + #print iddd + iddd+=1 + for c in cat_list.split(""): + if c not in cat_dict: + cat_dict[c] = 0 + cat_dict[c] += 1 + +sorted_uid_dict = sorted(uid_dict.iteritems(), key=lambda x:x[1], reverse=True) +sorted_mid_dict = sorted(mid_dict.iteritems(), key=lambda x:x[1], reverse=True) +sorted_cat_dict = sorted(cat_dict.iteritems(), key=lambda x:x[1], reverse=True) + +uid_voc = {} +index = 0 +for key, value in sorted_uid_dict: + uid_voc[key] = index + index += 1 + +mid_voc = {} +mid_voc["default_mid"] = 0 +index = 1 +for key, value in sorted_mid_dict: + mid_voc[key] = index + index += 1 + +cat_voc = {} +cat_voc["default_cat"] = 0 +index = 1 +for key, value in sorted_cat_dict: + cat_voc[key] = index + index += 1 + +cPickle.dump(uid_voc, open("uid_voc.pkl", "w")) +cPickle.dump(mid_voc, open("mid_voc.pkl", "w")) +cPickle.dump(cat_voc, open("cat_voc.pkl", "w")) diff --git a/modelzoo/CAN/script/local_aggretor.py b/modelzoo/CAN/script/local_aggretor.py new file mode 100644 index 00000000000..e7e23190a1d --- /dev/null +++ b/modelzoo/CAN/script/local_aggretor.py @@ -0,0 +1,46 @@ +import sys +import hashlib +import random + +fin = open("/home/test/modelzoo/DIEN/data/jointed-new-split-info", "r") +ftrain = open("/home/test/modelzoo/DIEN/data/local_train", "w") +ftest = open("/home/test/modelzoo/DIEN/data/local_test", "w") + +last_user = "0" +common_fea = "" +line_idx = 0 +for line in fin: + items = line.strip().split("\t") + ds = items[0] + clk = int(items[1]) + user = items[2] + movie_id = items[3] + dt = items[5] + cat1 = items[6] + + if ds=="20180118": + fo = ftrain + else: + fo = ftest + if user != last_user: + movie_id_list = [] + cate1_list = [] + #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + "" + else: + history_clk_num = len(movie_id_list) + cat_str = "" + mid_str = "" + for c1 in cate1_list: + cat_str += c1 + "" + for mid in movie_id_list: + mid_str += mid + "" + if len(cat_str) > 0: cat_str = cat_str[:-1] + if len(mid_str) > 0: mid_str = mid_str[:-1] + if history_clk_num >= 1: # 8 is the average length of user behavior + print(items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + mid_str + "\t" + cat_str, + file=fo) + last_user = user + if clk: + movie_id_list.append(movie_id) + cate1_list.append(cat1) + line_idx += 1 diff --git a/modelzoo/CAN/script/model.py b/modelzoo/CAN/script/model.py new file mode 100644 index 00000000000..133ded83f09 --- /dev/null +++ b/modelzoo/CAN/script/model.py @@ -0,0 +1,800 @@ +#import tensorflow as tf +import tensorflow.compat.v1 as tf +from tensorflow.python.ops.rnn_cell import GRUCell +from tensorflow.python.ops.rnn_cell import LSTMCell +from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as bi_rnn +#from tensorflow.python.ops.rnn import dynamic_rnn +from rnn import dynamic_rnn +from utils import * +from Dice import dice + +#### CAN config ##### +weight_emb_w = [[16, 8], [8,4]] +weight_emb_b = [0, 0] +print(weight_emb_w, weight_emb_b) +orders = 3 +order_indep = False # True +WEIGHT_EMB_DIM = (sum([w[0]*w[1] for w in weight_emb_w]) + sum(weight_emb_b)) #* orders +INDEP_NUM = 1 +if order_indep: + INDEP_NUM *= orders + +print("orders: ",orders) +CALC_MODE = "can" +device = '/gpu:2' +#### CAN config ##### + +def gen_coaction(ad, his_items, dim, mode="can", mask=None,keep_fake_carte_seq=False): + weight, bias = [], [] + idx = 0 + weight_orders = [] + bias_orders = [] + for i in range(orders): + for w, b in zip(weight_emb_w, weight_emb_b): + weight.append(tf.reshape(ad[:, idx:idx+w[0]*w[1]], [-1, w[0], w[1]])) + idx += w[0] * w[1] + if b == 0: + bias.append(None) + else: + bias.append(tf.reshape(ad[:, idx:idx+b], [-1, 1, b])) + idx += b + weight_orders.append(weight) + bias_orders.append(bias) + if not order_indep: + break + + if mode == "can": + out_seq = [] + hh = [] + for i in range(orders): + hh.append(his_items**(i+1)) + #hh = [sum(hh)] + for i, h in enumerate(hh): + if order_indep: + weight, bias = weight_orders[i], bias_orders[i] + else: + weight, bias = weight_orders[0], bias_orders[0] + for j, (w, b) in enumerate(zip(weight, bias)): + h = tf.matmul(h, w) + if b is not None: + h = h + b + if j != len(weight)-1: + h = tf.nn.tanh(h) + out_seq.append(h) + out_seq = tf.concat(out_seq, 2) + if mask is not None: + mask = tf.expand_dims(mask, axis=-1) + out_seq = out_seq * mask + out = tf.reduce_sum(out_seq, 1) + if keep_fake_carte_seq and mode=="emb": + return out, out_seq + return out, None + +class Model(object): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling = False, use_softmax=True, use_coaction=False, use_cartes=False): + with tf.name_scope('Inputs'): + self.mid_his_batch_ph = tf.placeholder(tf.int32, [None, None], name='mid_his_batch_ph') + self.cate_his_batch_ph = tf.placeholder(tf.int32, [None, None], name='cate_his_batch_ph') + self.uid_batch_ph = tf.placeholder(tf.int32, [None, ], name='uid_batch_ph') + self.mid_batch_ph = tf.placeholder(tf.int32, [None, ], name='mid_batch_ph') + self.cate_batch_ph = tf.placeholder(tf.int32, [None, ], name='cate_batch_ph') + self.mask = tf.placeholder(tf.float32, [None, None], name='mask') + self.seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph') + self.target_ph = tf.placeholder(tf.float32, [None, None], name='target_ph') + self.carte_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='carte_ph') + self.lr = tf.placeholder(tf.float64, []) + self.use_negsampling =use_negsampling + self.use_softmax = False #use_softmax + self.use_coaction = use_coaction + self.use_cartes = use_cartes + print("args:") + print("negsampling: ", self.use_negsampling) + print("softmax: ", self.use_softmax) + print("co-action: ", self.use_coaction) + print("carte: ", self.use_cartes) + if use_negsampling: + self.noclk_mid_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='noclk_mid_batch_ph') #generate 3 item IDs from negative sampling. + self.noclk_cate_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='noclk_cate_batch_ph') + + # Embedding layer + with tf.name_scope('Embedding_layer'): + self.uid_embeddings_var = tf.get_variable("uid_embedding_var", [n_uid, EMBEDDING_DIM]) + tf.summary.histogram('uid_embeddings_var', self.uid_embeddings_var) + self.uid_batch_embedded = tf.nn.embedding_lookup(self.uid_embeddings_var, self.uid_batch_ph) + + self.mid_embeddings_var = tf.get_variable("mid_embedding_var", [n_mid, EMBEDDING_DIM]) + tf.summary.histogram('mid_embeddings_var', self.mid_embeddings_var) + self.mid_batch_embedded = tf.nn.embedding_lookup(self.mid_embeddings_var, self.mid_batch_ph) + self.mid_his_batch_embedded = tf.nn.embedding_lookup(self.mid_embeddings_var, self.mid_his_batch_ph) + if self.use_negsampling: + self.noclk_mid_his_batch_embedded = tf.nn.embedding_lookup(self.mid_embeddings_var, self.noclk_mid_batch_ph) + + self.cate_embeddings_var = tf.get_variable("cate_embedding_var", [n_cate, EMBEDDING_DIM]) + tf.summary.histogram('cate_embeddings_var', self.cate_embeddings_var) + self.cate_batch_embedded = tf.nn.embedding_lookup(self.cate_embeddings_var, self.cate_batch_ph) + self.cate_his_batch_embedded = tf.nn.embedding_lookup(self.cate_embeddings_var, self.cate_his_batch_ph) + if self.use_negsampling: + self.noclk_cate_his_batch_embedded = tf.nn.embedding_lookup(self.cate_embeddings_var, self.noclk_cate_batch_ph) + + if self.use_cartes: + self.carte_embedding_vars = [] + self.carte_batch_embedded = [] + with tf.device(device): + for i, num in enumerate(n_carte): + print("carte num:", num) + self.carte_embedding_vars.append(tf.get_variable("carte_embedding_var_{}".format(i), [num, EMBEDDING_DIM], trainable=True)) + self.carte_batch_embedded.append(tf.nn.embedding_lookup(self.carte_embedding_vars[i], self.carte_batch_ph[:,i,:])) + + ### co-action ### + if self.use_coaction: + ph_dict = { + "item": [self.mid_batch_ph, self.mid_his_batch_ph, self.mid_his_batch_embedded], + "cate": [self.cate_batch_ph, self.cate_his_batch_ph, self.cate_his_batch_embedded] + } + self.mlp_batch_embedded = [] + with tf.device(device): + self.item_mlp_embeddings_var = tf.get_variable("item_mlp_embedding_var", [n_mid, INDEP_NUM * WEIGHT_EMB_DIM], trainable=True) + self.cate_mlp_embeddings_var = tf.get_variable("cate_mlp_embedding_var", [n_cate, INDEP_NUM * WEIGHT_EMB_DIM], trainable=True) + + self.mlp_batch_embedded.append(tf.nn.embedding_lookup(self.item_mlp_embeddings_var, ph_dict['item'][0])) + self.mlp_batch_embedded.append(tf.nn.embedding_lookup(self.cate_mlp_embeddings_var, ph_dict['cate'][0])) + + self.input_batch_embedded = [] + self.item_input_embeddings_var = tf.get_variable("item_input_embedding_var", [n_mid, weight_emb_w[0][0] * INDEP_NUM], trainable=True) + self.cate_input_embeddings_var = tf.get_variable("cate_input_embedding_var", [n_cate, weight_emb_w[0][0] * INDEP_NUM], trainable=True) + self.input_batch_embedded.append(tf.nn.embedding_lookup(self.item_input_embeddings_var, ph_dict['item'][1])) + self.input_batch_embedded.append(tf.nn.embedding_lookup(self.cate_input_embeddings_var, ph_dict['cate'][1])) + + self.item_eb = tf.concat([self.mid_batch_embedded, self.cate_batch_embedded], 1) + self.item_his_eb = tf.concat([self.mid_his_batch_embedded, self.cate_his_batch_embedded], 2) + self.item_his_eb_sum = tf.reduce_sum(self.item_his_eb, 1) + if self.use_negsampling: + self.noclk_item_his_eb = tf.concat( + [self.noclk_mid_his_batch_embedded[:, :, 0, :], self.noclk_cate_his_batch_embedded[:, :, 0, :]], -1)# 0 means only using the first negative item ID. 3 item IDs are inputed in the line 24. + self.noclk_item_his_eb = tf.reshape(self.noclk_item_his_eb, + [-1, tf.shape(self.noclk_mid_his_batch_embedded)[1], 2*EMBEDDING_DIM])# cat embedding 18 concate item embedding 18. + + self.noclk_his_eb = tf.concat([self.noclk_mid_his_batch_embedded, self.noclk_cate_his_batch_embedded], -1) + self.noclk_his_eb_sum_1 = tf.reduce_sum(self.noclk_his_eb, 2) + self.noclk_his_eb_sum = tf.reduce_sum(self.noclk_his_eb_sum_1, 1) + + self.cross = [] + if self.use_cartes: + if self.mask is not None: + mask = tf.expand_dims(self.mask, axis=-1) + for i,emb in enumerate(self.carte_batch_embedded): + emb = emb * mask + carte_eb_sum = tf.reduce_sum(emb, 1) + self.cross.append(carte_eb_sum) + + if self.use_coaction: + input_batch = self.input_batch_embedded + tmp_sum, tmp_seq = [], [] + if INDEP_NUM == 2: + for i, mlp_batch in enumerate(self.mlp_batch_embedded): + for j, input_batch in enumerate(self.input_batch_embedded): + coaction_sum, coaction_seq = gen_coaction(mlp_batch[:, WEIGHT_EMB_DIM * j: WEIGHT_EMB_DIM * (j+1)], input_batch[:, :, weight_emb_w[0][0] * i: weight_emb_w[0][0] * (i+1)], EMBEDDING_DIM, mode=CALC_MODE,mask=self.mask) + tmp_sum.append(coaction_sum) + tmp_seq.append(coaction_seq) + else: + for i, (mlp_batch, input_batch) in enumerate(zip(self.mlp_batch_embedded, self.input_batch_embedded)): + coaction_sum, coaction_seq = gen_coaction(mlp_batch[:, : INDEP_NUM * WEIGHT_EMB_DIM], input_batch[:, :, : weight_emb_w[0][0]], EMBEDDING_DIM, mode=CALC_MODE, mask=self.mask) + tmp_sum.append(coaction_sum) + tmp_seq.append(coaction_seq) + + self.coaction_sum = tf.concat(tmp_sum, axis=1) + self.cross.append(self.coaction_sum) + + def build_fcn_net(self, inp, use_dice = False): + bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1') + dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1') + if use_dice: + dnn1 = dice(dnn1, name='dice_1') + else: + dnn1 = prelu(dnn1, 'prelu1') + + dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2') + if use_dice: + dnn2 = dice(dnn2, name='dice_2') + else: + dnn2 = prelu(dnn2, 'prelu2') + dnn3 = tf.layers.dense(dnn2, 2 if self.use_softmax else 1, activation=None, name='f3') + return dnn3 + + def build_loss(self, inp, L2=False): + + with tf.name_scope('Metrics'): + # Cross-entropy loss and optimizer initialization + if self.use_softmax: + self.y_hat = tf.nn.softmax(inp) + 0.00000001 + ctr_loss = - tf.reduce_mean(tf.log(self.y_hat) * self.target_ph) + else: + self.y_hat = tf.nn.sigmoid(inp) + ctr_loss = - tf.reduce_mean(tf.concat([tf.log(self.y_hat + 0.00000001) * self.target_ph, tf.log(1 - self.y_hat + 0.00000001) * (1-self.target_ph)], axis=1)) + self.loss = ctr_loss + if self.use_negsampling: + self.loss += self.aux_loss + if L2: + self.loss += self.l2_loss + + tf.summary.scalar('loss', self.loss) + self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) + + # Accuracy metric + if self.use_softmax: + self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32)) + else: + self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32)) + tf.summary.scalar('accuracy', self.accuracy) + + + def auxiliary_loss(self, h_states, click_seq, noclick_seq, mask, stag = None): + mask = tf.cast(mask, tf.float32) + click_input_ = tf.concat([h_states, click_seq], -1) + noclick_input_ = tf.concat([h_states, noclick_seq], -1) + click_prop_ = self.auxiliary_net(click_input_, stag = stag)[:, :, 0] + noclick_prop_ = self.auxiliary_net(noclick_input_, stag = stag)[:, :, 0] + click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * mask + noclick_loss_ = - tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * mask + loss_ = tf.reduce_mean(click_loss_ + noclick_loss_) + return loss_ + + def auxiliary_net(self, in_, stag='auxiliary_net'): + bn1 = tf.layers.batch_normalization(inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE) + dnn1 = tf.layers.dense(bn1, 100, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE) + dnn1 = tf.nn.sigmoid(dnn1) + dnn2 = tf.layers.dense(dnn1, 50, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE) + dnn2 = tf.nn.sigmoid(dnn2) + dnn3 = tf.layers.dense(dnn2, 2 if self.use_softmax else 1, activation=None, name='f3' + stag, reuse=tf.AUTO_REUSE) + if self.use_softmax: + y_hat = tf.nn.softmax(dnn3) + 0.00000001 + else: + y_hat = tf.nn.sigmoid(dnn3) + 0.00000001 + return y_hat + + + def train(self, sess, inps): + if self.use_negsampling: + loss, accuracy, aux_loss, _ = sess.run([self.loss, self.accuracy, self.aux_loss, self.optimizer], feed_dict={ + self.uid_batch_ph: inps[0], + self.mid_batch_ph: inps[1], + self.cate_batch_ph: inps[2], + self.mid_his_batch_ph: inps[3], + self.cate_his_batch_ph: inps[4], + self.mask: inps[5], + self.target_ph: inps[6], + self.seq_len_ph: inps[7], + self.lr: inps[8], + self.noclk_mid_batch_ph: inps[9], + self.noclk_cate_batch_ph: inps[10], + self.carte_batch_ph: inps[11] + }) + return loss, accuracy, aux_loss + else: + loss, accuracy, _ = sess.run([self.loss, self.accuracy, self.optimizer], feed_dict={ + self.uid_batch_ph: inps[0], + self.mid_batch_ph: inps[1], + self.cate_batch_ph: inps[2], + self.mid_his_batch_ph: inps[3], + self.cate_his_batch_ph: inps[4], + self.mask: inps[5], + self.target_ph: inps[6], + self.seq_len_ph: inps[7], + self.lr: inps[8], + self.carte_batch_ph: inps[11] + }) + return loss, accuracy, 0 + + def calculate(self, sess, inps): + if self.use_negsampling: + probs, loss, accuracy, aux_loss = sess.run([self.y_hat, self.loss, self.accuracy, self.aux_loss], feed_dict={ + self.uid_batch_ph: inps[0], + self.mid_batch_ph: inps[1], + self.cate_batch_ph: inps[2], + self.mid_his_batch_ph: inps[3], + self.cate_his_batch_ph: inps[4], + self.mask: inps[5], + self.target_ph: inps[6], + self.seq_len_ph: inps[7], + self.noclk_mid_batch_ph: inps[8], + self.noclk_cate_batch_ph: inps[9], + self.carte_batch_ph: inps[10] + }) + return probs, loss, accuracy, aux_loss + else: + probs, loss, accuracy = sess.run([self.y_hat, self.loss, self.accuracy], feed_dict={ + self.uid_batch_ph: inps[0], + self.mid_batch_ph: inps[1], + self.cate_batch_ph: inps[2], + self.mid_his_batch_ph: inps[3], + self.cate_his_batch_ph: inps[4], + self.mask: inps[5], + self.target_ph: inps[6], + self.seq_len_ph: inps[7], + self.carte_batch_ph: inps[10] + }) + return probs, loss, accuracy, 0 + + def save(self, sess, path): + saver = tf.train.Saver() + saver.save(sess, save_path=path) + + def restore(self, sess, path): + saver = tf.train.Saver() + saver.restore(sess, save_path=path) + print('model restored from %s' % path) + +class Model_NCF(Model): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True): + super(Model_NCF, self).__init__(n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, + ATTENTION_SIZE, + use_negsampling, use_softmax) + with tf.name_scope('ncf_embedding'): + self.ncf_item_embedding_var = tf.get_variable("ncf_item_embedding_var", [n_mid, EMBEDDING_DIM], trainable=True) + self.ncf_cate_embedding_var = tf.get_variable("ncf_cate_embedding_var", [n_cate, EMBEDDING_DIM], trainable=True) + + ncf_item_emb = tf.nn.embedding_lookup(self.ncf_item_embedding_var, self.mid_batch_ph) + ncf_item_his_emb = tf.nn.embedding_lookup(self.ncf_item_embedding_var, self.mid_his_batch_ph) + ncf_cate_emb = tf.nn.embedding_lookup(self.ncf_cate_embedding_var, self.cate_batch_ph) + ncf_cate_his_emb = tf.nn.embedding_lookup(self.ncf_cate_embedding_var, self.cate_his_batch_ph) + + ncf_item_his_sum = tf.reduce_mean(ncf_item_his_emb, axis=1) + ncf_cate_his_sum = tf.reduce_mean(ncf_cate_his_emb, axis=1) + mf = tf.concat([ncf_item_emb * ncf_item_his_sum, ncf_cate_emb * ncf_cate_his_sum], axis=1) + + inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) + logit = self.build_fcn_net(inp, mf, use_dice=False) + self.build_loss(logit) + + def build_fcn_net(self, inp, mf, use_dice = False): + bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1') + dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1') + if use_dice: + dnn1 = dice(dnn1, name='dice_1') + else: + dnn1 = prelu(dnn1, scope='prelu_1') + + dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2') + if use_dice: + dnn2 = dice(dnn2, name='dice_2') + else: + dnn2 = prelu(dnn2, scope='prelu_2') + + dnn2 = tf.concat([dnn2, mf], axis=1) + dnn3 = tf.layers.dense(dnn2, 2 if self.use_softmax else 1, activation=None, name='f3') + return dnn3 + +def ProductLayer(feas, DIM, share=True): + row, col = [], [] + num = len(feas) + pair = num * (num-1) / 2 + for i in range(num - 1): + for j in range(i+1, num): + row.append(i) + col.append(j) + if share: + p = tf.stack([feas[i] for i in row], axis=1) + q = tf.stack([feas[i] for i in col], axis=1) + else: + tmp = [] + count = {} + for i in row: + if i not in count: + count[i] = 0 + else: + count[i] += 1 + k = count[i] + tmp.append(feas[i][:, k*DIM:(k+1)*DIM]) + p = tf.stack(tmp, axis=1) + tmp = [] + for i in col: + if i not in count: + count[i] = 0 + else: + count[i] += 1 + k = count[i] + tmp.append(feas[i][:, k*DIM:(k+1)*DIM]) + q = tf.stack(tmp, axis=1) + + ipnn = p * q + ipnn = tf.reduce_sum(ipnn, axis=2, keep_dims=False) + p = tf.expand_dims(p, axis=1) + w = tf.get_variable("pnn_var", [DIM, pair, DIM], trainable=True) + opnn = tf.reduce_sum((tf.multiply((tf.transpose(tf.reduce_sum(tf.multiply(p, w), axis=-1), [0, 2, 1])), q)), axis=-1) + pnn = tf.concat([ipnn, opnn], axis=1) + return pnn + +class Model_PNN(Model): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True): + super(Model_PNN, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) + + fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_mean(self.mid_his_batch_embedded, axis=1), tf.reduce_mean(self.cate_his_batch_embedded, axis=1)] + pnn = ProductLayer(fea_list, EMBEDDING_DIM) + inp = tf.concat([self.uid_batch_embedded[:, :18], self.item_eb[:, :36], self.item_his_eb_sum[:, :36], pnn], 1) + logit = self.build_fcn_net(inp, use_dice=False) + self.build_loss(logit) + +def FMLayer(feas, output_dim=1): + feas = tf.stack(feas, axis=1) + square_of_sum = tf.reduce_sum(feas, axis=1, keep_dims=True) ** 2 + sum_of_square = tf.reduce_sum(feas ** 2, axis=1, keep_dims=True) + fm_term = 0.5 * tf.reduce_sum(square_of_sum - sum_of_square, axis=2, keep_dims=False) + if output_dim==2: + fm_term = tf.concat([fm_term, tf.zeros_like(fm_term)], axis=1) + return fm_term + +class Model_FM(Model): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): + super(Model_FM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) + + w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) + w_cate_var = tf.get_variable("w_cate_var", [n_mid, 1], trainable=True) + wx = [] + wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) + wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) + b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) + + wx = tf.concat(wx, axis=1) + lr_term = tf.reduce_sum(wx, axis=1) + b + + fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded, axis=1), tf.reduce_sum(self.cate_his_batch_embedded, axis=1)] + logit = tf.reduce_sum(wx, axis=1) + b + FMLayer(fea_list, 1) + + #self.l2_loss = 2e-5 * tf.add_n([tf.nn.l2_loss(v) for v in [wx, self.item_eb, self.item_his_eb_sum]]) + self.build_loss(logit, L2=False) + +class Model_FFM(Model): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): + super(Model_FFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) + + w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) + w_cate_var = tf.get_variable("w_cate_var", [n_mid, 1], trainable=True) + wx = [] + wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) + wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) + b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) + + wx = tf.concat(wx, axis=1) + lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b + + with tf.name_scope('FFM_embedding'): + + FFM_item_embedding_var = tf.get_variable("FFM_item_embedding_var", [n_mid, 3, EMBEDDING_DIM], trainable=True) + FFM_cate_embedding_var = tf.get_variable("FFM_cate_embedding_var", [n_cate, 3, EMBEDDING_DIM], trainable=True) + item_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_batch_ph) + item_his_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_his_batch_ph) + item_his_sum = tf.reduce_sum(item_his_emb, axis=1) + + cate_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_batch_ph) + cate_his_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_his_batch_ph) + cate_his_sum = tf.reduce_sum(cate_his_emb, axis=1) + + fea_list = [item_emb, item_his_sum, cate_emb, cate_his_sum] + feas = tf.stack(fea_list, axis=1) + num = len(fea_list) + rows, cols = [], [] + for i in range(num-1): + for j in range(i+1, num): + rows.append([i, j-1]) + cols.append([j, i]) + p = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), rows), [1,0,2]) + q = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), cols), [1,0,2]) + ffm_term = tf.reduce_sum(p * q, axis=2) + ffm_term = tf.reduce_sum(ffm_term, axis=1, keep_dims=True) + logit = lr_term + ffm_term + self.build_loss(logit) + + +class Model_DeepFFM(Model): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): + super(Model_DeepFFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) + + w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) + w_cate_var = tf.get_variable("w_cate_var", [n_mid, 1], trainable=True) + wx = [] + wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) + wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) + b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) + + wx = tf.concat(wx, axis=1) + lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b + + with tf.name_scope('FFM_embedding'): + + FFM_item_embedding_var = tf.get_variable("FFM_item_embedding_var", [n_mid, 3, EMBEDDING_DIM], trainable=True) + FFM_cate_embedding_var = tf.get_variable("FFM_cate_embedding_var", [n_cate, 3, EMBEDDING_DIM], trainable=True) + item_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_batch_ph) + item_his_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_his_batch_ph) + item_his_sum = tf.reduce_sum(item_his_emb, axis=1) + + cate_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_batch_ph) + cate_his_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_his_batch_ph) + cate_his_sum = tf.reduce_sum(cate_his_emb, axis=1) + + fea_list = [item_emb, item_his_sum, cate_emb, cate_his_sum] + feas = tf.stack(fea_list, axis=1) + num = len(fea_list) + rows, cols = [], [] + for i in range(num-1): + for j in range(i+1, num): + rows.append([i, j-1]) + cols.append([j, i]) + p = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), rows), [1,0,2]) + q = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), cols), [1,0,2]) + ffm_term = tf.reduce_sum(p * q, axis=2) + ffm_term = tf.reduce_sum(ffm_term, axis=1, keep_dims=True) + + inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) + dnn_term = self.build_fcn_net(inp, use_dice=False) + + logit = dnn_term + lr_term + ffm_term + self.build_loss(logit) + +class Model_DeepFM(Model): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): + super(Model_DeepFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) + w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) + w_cate_var = tf.get_variable("w_cate_var", [n_cate, 1], trainable=True) + wx = [] + wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) + wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) + b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) + + wx = tf.concat(wx, axis=1) + lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b + + inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) + logit = self.build_fcn_net(inp, use_dice=False) + + fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded, axis=1), tf.reduce_sum(self.cate_his_batch_embedded, axis=1)] + fm_term = FMLayer(fea_list) + logit = tf.layers.dense(tf.concat([logit, fm_term, lr_term], axis=1), 1, activation=None, name='fm_fc') + #self.l2_loss = 0.01 * tf.add_n([tf.nn.l2_loss(v) for v in [wx, self.item_eb, self.item_his_eb_sum]]) + self.build_loss(logit, L2=False) + +def ExtremeFMLayer(feas, dim, output_dim=1): + num = len(feas) + feas = tf.stack(feas, axis=1) # batch, field_num, emb_dim + hidden_nn_layers = [] + field_nums = [num] + final_len = 0 + hidden_nn_layers.append(feas) + final_result = [] + cross_layers = [256, 256, 256] + + split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2) + + with tf.variable_scope("xfm", initializer=tf.contrib.layers.xavier_initializer(uniform=True)) as scope: + for idx, layer_size in enumerate(cross_layers): + split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2) + dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True) + dot_result_o = tf.reshape(dot_result_m, shape=[dim, -1, field_nums[0] * field_nums[-1]]) + dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2]) + + filters = tf.get_variable(name="f_" + str(idx), + shape=[1, field_nums[-1] * field_nums[0], layer_size], + dtype=tf.float32) + + curr_out = tf.nn.conv1d(dot_result, filters=filters, stride=1, padding='VALID') + curr_out = tf.transpose(curr_out, perm=[0, 2, 1]) + + if idx != len(cross_layers) - 1: + next_hidden, direct_connect = tf.split(curr_out, 2 * [int(layer_size / 2)], 1) + final_len += int(layer_size / 2) + else: + direct_connect = curr_out + next_hidden = 0 + final_len += layer_size + field_nums.append(int(layer_size / 2)) + + final_result.append(direct_connect) + hidden_nn_layers.append(next_hidden) + + + result = tf.concat(final_result, axis=1) + result = tf.reduce_sum(result, -1) + + w_nn_output = tf.get_variable(name='w_nn_output', + shape=[final_len, 1], + dtype=tf.float32) + b_nn_output = tf.get_variable(name='b_nn_output', + shape=[1], + dtype=tf.float32, + initializer=tf.zeros_initializer()) + xfm_term = tf.matmul(result, w_nn_output) + b_nn_output + + if output_dim==2: + xfm_term = tf.concat([xfm_term, tf.zeros_like(xfm_term)], axis=1) + return xfm_term + +class Model_xDeepFM(Model): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): + super(Model_xDeepFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) + + w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) + w_cate_var = tf.get_variable("w_cate_var", [n_cate, 1], trainable=True) + wx = [] + wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) + wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) + wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) + b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) + + wx = tf.concat(wx, axis=1) + lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b + + inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) + mlp_term = self.build_fcn_net(inp, use_dice=False) + + fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded, axis=1), tf.reduce_sum(self.cate_his_batch_embedded, axis=1)] + fm_term = ExtremeFMLayer(fea_list, EMBEDDING_DIM) + self.build_loss(mlp_term + fm_term) + +class Model_PIN(Model): + def __init__(self,n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): + super(Model_PIN, self).__init__(n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, Flag="PIN") + + inp = tf.concat([self.item_eb, self.item_his_eb_sum], 1) + logit = self.build_fcn_net(inp, use_dice=False) + + feas = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded * tf.reshape(self.mask,(BATCH_SIZE, SEQ_LEN, 1)), axis=1), tf.reduce_sum(self.cate_his_batch_embedded * tf.reshape(self.mask,(BATCH_SIZE, SEQ_LEN, 1)), axis=1)] + + self.feas = feas + row, col = [], [] + num = len(feas) + for i in range(num - 1): + for j in range(i+1, num): + row.append(i) + col.append(j) + pairs = len(rows) + p = tf.concat([feas[i] for i in row], axis=1) + q = tf.concat([feas[i] for i in col], axis=1) + pq = p * q + inp = tf.concat([p,q,pq], axis=2) #batch, pair, 3*dim + logit = self.pin(inp) + self.build_loss(logit) + + def pin(self, inp): + batch, pair, dim = inp.shape.as_list() + with tf.variable_scope('product_network'): + inp = tf.transpose(inp, [1,0,2]) + x = tf.layers.dense(inp, 20, activation=None, name='fc1') + x = tf.layers.batch_normalization(x, name='bn1') + x = tf.nn.relu(x) + x = tf.layers.dense(x, 1, activation=None, name='fc2') + x = tf.layers.batch_normalization(x, name='bn2') + x = tf.transpose(x, [1,0,2]) + sub_out = tf.reshape(x, [-1, pair * dim]) + + with tf.variable_scope('network'): + new_inp = tf.concat(self.feas+[sub_out], axis=1) + x = tf.layers.dense(sub_out, 400, activation=tf.nn.relu, name='fc1') + x = tf.layers.dense(x, 400, activation=tf.nn.relu, name='fc2') + x = tf.layers.dense(x, 400, activation=tf.nn.relu, name='fc3') + x = tf.layers.dense(x, 1, activation=None, name='fc4') + return x + +class Model_ONN(Model): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): + super(Model_ONN, self).__init__(n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) + + dim = 5 + self.item_embedding_var = tf.get_variable("item_embedding_var_onn", [n_mid, dim * 3], trainable=True) + self.item_emb = tf.nn.embedding_lookup(self.item_embedding_var, self.mid_batch_ph) + self.item_his_emb = tf.nn.embedding_lookup(self.item_embedding_var, self.mid_his_batch_ph) + self.item_his_emb_sum = tf.reduce_mean(self.item_his_emb, axis=1) + + self.cate_embedding_var = tf.get_variable("cate_embedding_var_onn", [n_cate, dim * 3], trainable=True) + self.cate_emb = tf.nn.embedding_lookup(self.cate_embedding_var, self.cate_batch_ph) + self.cate_his_emb = tf.nn.embedding_lookup(self.cate_embedding_var, self.cate_his_batch_ph) + self.cate_his_emb_sum = tf.reduce_mean(self.cate_his_emb, axis=1) + + fea_list = [self.item_emb, self.cate_emb, self.item_his_emb_sum, self.cate_his_emb_sum] + onn = ProductLayer(fea_list, dim, False) + + inp = tf.concat([self.uid_batch_embedded, self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_mean(self.mid_his_batch_embedded, axis=1), tf.reduce_mean(self.cate_his_batch_embedded, axis=1), onn], 1) + logit = self.build_fcn_net(inp, use_dice=False) + self.build_loss(logit) + +class Model_WideDeep(Model): + def __init__(self, n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False): + super(Model_WideDeep, self).__init__(n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, + ATTENTION_SIZE, + use_negsampling) + + inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) + # Fully connected layer + bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1') + dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1') + dnn1 = prelu(dnn1, 'p1') + dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2') + dnn2 = prelu(dnn2, 'p2') + dnn3 = tf.layers.dense(dnn2, 2, activation=None, name='f3') + d_layer_wide = tf.concat([tf.concat([self.item_eb,self.item_his_eb_sum], axis=-1), + self.item_eb * self.item_his_eb_sum], axis=-1) + d_layer_wide = tf.layers.dense(d_layer_wide, 2, activation=None, name='f_fm') + self.y_hat = tf.nn.softmax(dnn3 + d_layer_wide) + + with tf.name_scope('Metrics'): + # Cross-entropy loss and optimizer initialization + self.loss = - tf.reduce_mean(tf.log(self.y_hat) * self.target_ph) + tf.summary.scalar('loss', self.loss) + self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) + + # Accuracy metric + self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32)) + tf.summary.scalar('accuracy', self.accuracy) + self.merged = tf.summary.merge_all() + +class Model_DNN(Model): + def __init__(self, n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True, use_coaction=False, use_cartes=False): + #EMBEDDING_DIM = 4 + super(Model_DNN, self).__init__(n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, + ATTENTION_SIZE, + use_negsampling, use_softmax=use_softmax, use_coaction=use_coaction, use_cartes=use_cartes) + + inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum]+self.cross, 1) + logit = self.build_fcn_net(inp, use_dice=False) + self.build_loss(logit) + + +class Model_DIN(Model): + def __init__(self, n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True): + super(Model_DIN, self).__init__(n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, + ATTENTION_SIZE, + use_negsampling, use_softmax=use_softmax) + + # Attention layer + with tf.name_scope('Attention_layer'): + attention_output = din_attention(self.item_eb, self.item_his_eb, ATTENTION_SIZE, self.mask) + att_fea = tf.reduce_sum(attention_output, 1) + tf.summary.histogram('att_fea', att_fea) + inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum, att_fea], -1) + # Fully connected layer + logit = self.build_fcn_net(inp, use_dice=True) + self.build_loss(logit) + + +class Model_DIEN(Model): + def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=True, use_coaction=False): + super(Model_DIEN, self).__init__(n_uid, n_mid, n_cate, n_carte, + EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, + use_negsampling, use_coaction=use_coaction) + + # RNN layer(-s) + with tf.name_scope('rnn_1'): + rnn_outputs, _ = dynamic_rnn(GRUCell(HIDDEN_SIZE), inputs=self.item_his_eb, + sequence_length=self.seq_len_ph, dtype=tf.float32, + scope="gru1") + tf.summary.histogram('GRU_outputs', rnn_outputs) + + aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :], + self.noclk_item_his_eb[:, 1:, :], + self.mask[:, 1:], stag="gru") + self.aux_loss = aux_loss_1 + + # Attention layer + with tf.name_scope('Attention_layer_1'): + att_outputs, alphas = din_fcn_attention(self.item_eb, rnn_outputs, ATTENTION_SIZE, self.mask, + softmax_stag=1, stag='1_1', mode='LIST', return_alphas=True) + tf.summary.histogram('alpha_outputs', alphas) + + with tf.name_scope('rnn_2'): + rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE), inputs=rnn_outputs, + att_scores = tf.expand_dims(alphas, -1), + sequence_length=self.seq_len_ph, dtype=tf.float32, + scope="gru2") + tf.summary.histogram('GRU2_Final_State', final_state2) + + inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum, final_state2]+self.cross, 1) + prop = self.build_fcn_net(inp, use_dice=True) + self.build_loss(prop) diff --git a/modelzoo/CAN/script/model_avazu.py b/modelzoo/CAN/script/model_avazu.py new file mode 100644 index 00000000000..cfaaedf815e --- /dev/null +++ b/modelzoo/CAN/script/model_avazu.py @@ -0,0 +1,973 @@ +#coding:utf-8 +import tensorflow as tf +from utils import * +from tensorflow.python.ops.rnn_cell import GRUCell +import mimn as mimn +import rum as rum +from rnn import dynamic_rnn +# import mann_simple_cell as mann_cell +import random + +### Exp config ### + +feature_num = [ + 264,7,7,4842,7912,26,9136,580,36, + 7338655,8303,5,4,2885,8,9,474,4,69,172,62 +] +# id starts with 1 +id_offset = [0] + [sum(feature_num[:i]) for i in range(1, len(feature_num))] + +emb_as_weight = True #False #True +use_new_seq_emb = True #False # True +#edge_type = "item" +edge_type = "3-9" +use_cartes = ["item-his_item"] +use_cartes = ["cate-his_cate"] +use_cartes = [ + "3-9", "3-10", "4-9", "4-10", "6-9", "6-10", "7-9", "7-10", + "16-9", "16-10", "19-9", "19-10", "13-16-19", "13-16-19-9", "13-16-19-10", + "16-3", "16-6", "19-3", "19-6", "13-16-19-3", "13-16-19-6" +] +use_cartes = [] + +WEIGHT_EMB_NUM = 1 +orders = 5 +CALC_MODE = "poly_x_x4" +weight_emb_w, weight_emb_b = [], [] +alpha = 1 +if CALC_MODE in ["seq_sum", "seq", "emb"]: + weight_emb_w = [[4, 3], [3,4]] + #weight_emb_w = [[16, 3], [3,4]] + #weight_emb_w = [[16, 3], [3,4], [4,5],[5,5]] + weight_emb_b = [3, 0] + #weight_emb_b = [3, 4, 5, 0] + WEIGHT_EMB_DIM = sum([w[0]*w[1] for w in weight_emb_w]) + sum(weight_emb_b) +elif CALC_MODE.startswith("poly"): + WEIGHT_EMB_DIM = 16 + if "vec" in CALC_MODE: + WEIGHT_EMB_DIM = int(WEIGHT_EMB_DIM ** 0.5) + elif "wx_ind" in CALC_MODE: + WEIGHT_EMB_DIM *= 2 + elif "x_ind" in CALC_MODE: + WEIGHT_EMB_DIM *= orders + elif "x4" in CALC_MODE: + alpha = 4 + WEIGHT_EMB_DIM *= alpha**2 + +keep_fake_carte_seq = False # True +carte_with_gru = True #False + +carte_num_dict = { + "3-6": 8315+1, + "6-9": 1849306+1, + "4-7": 4547+1, + "3-9": 2102068+1, + "3-10": 161045+1, + "4-9": 2073680+1, + "4-10": 146645+1, + "6-9": 1851115+1, + "6-10": 93771+1, + "7-9": 1765776+1, + "7-10": 23738+1, + "16-9": 2135855+1, + "16-10": 128321+1, + "19-9": 1637771+1, + "19-10": 57099+1, + "13-16-19": 16905+1, + "13-16-19-9": 2579867+1, + "13-16-19-10": 447410+1, + "16-3": 33287+1, + "16-6": 25011+1, + "19-3": 24748+1, + "19-6": 22125+1, + "13-16-19-3": 142791+1, + "13-16-19-6": 86211+1, +} +if use_cartes: + n_cid = sum([carte_num_dict[c] for c in use_cartes]) - (len(use_cartes) - 1) +#n_cid = 59201 #6689210 #8586832 #6689210 #6630010 + +def eb_as_weight(ad, his_items, dim, mode="seq"): + ad = tf.reshape(ad, [-1, WEIGHT_EMB_DIM]) + weight, bias = [], [] + idx = 0 + for w, b in zip(weight_emb_w, weight_emb_b): + weight.append(tf.reshape(ad[:, idx:idx+w[0]*w[1]], [-1, w[0], w[1]])) + idx += w[0] * w[1] + if b == 0: + bias.append(None) + else: + bias.append(tf.reshape(ad[:, idx:idx+b], [-1, 1, b])) + idx += b + + if mode == "seq_sum": + his_items_sum = tf.reduce_sum(his_items, 1) + his_items_sum = tf.reshape(his_items_sum, [-1, 1, dim]) + out_seq = tf.nn.selu(tf.matmul(his_items_sum, w_1) + b) + out_seq = tf.matmul(out_seq, w_2) + out = tf.reduce_sum(out_seq, 1) + elif mode == "seq": + his_items_ = tf.unstack(his_items, axis=1) + out_seq = [] + for item in his_items_: + item = tf.reshape(item, [-1, 1, dim]) + #out.append(tf.nn.selu(tf.matmul(item, w) + b)) + h = item + for w, b in zip(weight, bias): + h = tf.matmul(h, w) + if b is not None: + h = tf.nn.selu(h + b) + out_seq.append(h) + #h = tf.nn.selu(tf.matmul(item, w_1) + b) + #out_seq.append(tf.matmul(h, w_2)) + out_seq = tf.concat(out_seq, 1) + out = tf.reduce_sum(out_seq, 1) + elif mode == "emb": + inp = his_items + h = tf.reshape(inp, [-1, 1, dim]) + for w, b in zip(weight, bias): + h = tf.matmul(h, w) + if b is not None: + h = tf.nn.selu(h + b) + out = h + out = tf.reduce_sum(out, 1) + elif mode == "poly": + h = tf.reshape(his_items, [-1, 1, dim]) + w = tf.reshape(ad, [-1, dim, dim]) + ww = [w**(i+1) for i in range(orders)] + for i in range(orders): + h = tf.matmul(h, ww[i]) + #if i < 2: + h = tf.nn.tanh(h) + out = h + out = tf.reduce_sum(out, 1) + elif mode == "poly_w": + h = tf.reshape(his_items, [-1, 1, dim]) + w = tf.reshape(ad, [-1, dim, dim]) + ww = [w**(i+1) for i in range(orders)] + out = [] + for i in range(orders): + out.append(tf.nn.tanh(tf.matmul(h, ww[i]))) + out = tf.reduce_sum(tf.concat(out, axis=1), 1) + elif mode == "poly_x": + h = tf.reshape(his_items, [-1, 1, dim]) + w = tf.reshape(ad, [-1, dim, dim]) + hh = [h**(i+1) for i in range(orders)] + out = [] + for i in range(orders): + #out.append(tf.nn.tanh(tf.matmul(hh[i], w))) + out.append(tf.matmul(hh[i], w)) + out = tf.reduce_sum(tf.concat(out, axis=1), 1) + elif mode == "poly_x_x4": + h = tf.reshape(his_items, [-1, 1, dim * alpha]) + w = tf.reshape(ad, [-1, dim*alpha, dim*alpha]) + hh = [h**(i+1) for i in range(orders)] + out = [] + for i in range(orders): + out.append(tf.nn.tanh(tf.matmul(hh[i], w))) + #out.append(tf.matmul(hh[i], w)) + out = tf.reduce_sum(tf.concat(out, axis=1), 1) + elif mode == "poly_x_ind": + h = tf.reshape(his_items, [-1, 1, dim]) + ww = tf.split(ad, num_or_size_splits=orders, axis=1) + ww = [tf.reshape(w, [-1, dim, dim]) for w in ww] + hh = [h**(i+1) for i in range(orders)] + out = [] + for i in range(orders): + out.append(tf.nn.tanh(tf.matmul(hh[i], ww[i]))) + #out.append(tf.matmul(hh[i], ww[i])) + out = tf.reduce_sum(tf.concat(out, axis=1), 1) + elif mode == "poly_wx": + h = tf.reshape(his_items, [-1, 1, dim]) + w = tf.reshape(ad, [-1, dim, dim]) + ww = [w**(i+1) for i in range(orders)] + hh = [h**(i+1) for i in range(orders)] + out = [] + for i in range(orders): + out.append(tf.nn.tanh(tf.matmul(hh[i], w))) + out.append(tf.nn.tanh(tf.matmul(h, ww[i]))) + out = tf.reduce_sum(tf.concat(out, axis=1), 1) + elif mode == "poly_wx_ind": + h = tf.reshape(his_items, [-1, 1, dim]) + ww = tf.split(ad, num_or_size_splits=2, axis=1) + ww = [tf.reshape(w, [-1, dim, dim]) for w in ww] + ww1 = [ww[1]**(i+1) for i in range(orders)] + hh = [h**(i+1) for i in range(orders)] + out = [] + for i in range(orders): + out.append(tf.nn.tanh(tf.matmul(hh[i], ww[0]))) + out.append(tf.nn.tanh(tf.matmul(h, ww1[i]))) + out = tf.reduce_sum(tf.concat(out, axis=1), 1) + elif mode == "poly_x_vec": + h = tf.reshape(his_items, [-1, 1, dim]) + w = tf.reshape(ad, [-1, 1, dim]) + hh = [h**(i+1) for i in range(orders)] + out = [] + for i in range(orders): + out.append(tf.nn.tanh(hh[i] * w)) + #out.append(hh[i] * w) + out = tf.reduce_sum(tf.concat(out, axis=1), 1) + out = tf.reduce_sum(tf.concat(out, axis=1), 1) + elif mode == "poly_pure": + h = tf.reshape(his_items, [-1, 1, dim]) + w = tf.reshape(ad, [-1, dim, dim]) + ww = [w**(i+1) for i in range(orders)] + hh = [h**(i+1) for i in range(orders)] + out = [] + for i in range(orders): + for j in range(orders): + out.append(tf.nn.tanh(tf.matmul(hh[i], ww[j]))) + out = tf.reduce_sum(tf.concat(out, axis=1), 1) + + #out = tf.nn.selu(out) + if keep_fake_carte_seq and mode=="seq": + return out, out_seq + return out, None + +def FM(feas): + feas = tf.stack(feas, aixs=1) + square_of_sum = tf.reduce_sum(feas, axis=1) ** 2 + sum_of_square = tf.reduce_sum(feas ** 2, axis=1) + return 0.5 * (square_of_sum - sum_of_square) + +class Model(object): + def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN, use_negsample=False, Flag="DNN"): + self.model_flag = Flag + self.reg = False + self.use_negsample= use_negsample + with tf.name_scope('Inputs'): + self.user_batch_ph = tf.placeholder(tf.int32, [None, None], name='user_batch_ph') + self.ad_batch_ph = tf.placeholder(tf.int32, [None, None], name='ad_batch_ph') + self.scene_batch_ph = tf.placeholder(tf.int32, [None, None], name='scene_batch_ph') + self.time_batch_ph = tf.placeholder(tf.int32, [None, ], name='time_batch_ph') + self.clk_seq_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='clk_seq_batch_ph') + self.carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='carte_batch_ph') + #self.noclk_seq_batch_ph = tf.placeholder(tf.int32, [None, None], name='noclk_seq_batch_ph') + ''' + self.item_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='item_carte_batch_ph') + self.cate_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='cate_carte_batch_ph') + self.item_cate_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='item_cate_carte_batch_ph') + self.cate_item_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='cate_item_carte_batch_ph') + ''' + self.clk_mask = tf.placeholder(tf.float32, [None, None], name='clk_mask_batch_ph') + self.target_ph = tf.placeholder(tf.float32, [None, 2], name='target_ph') + self.lr = tf.placeholder(tf.float64, []) + + # Embedding layer + with tf.name_scope('Embedding_layer'): + + ad_ph = tf.split(self.ad_batch_ph, num_or_size_splits=10, axis=1) + scene_ph = tf.split(self.scene_batch_ph, num_or_size_splits=6, axis=1) + user_ph = tf.split(self.user_batch_ph, num_or_size_splits=4, axis=1) + feature_ph = [self.time_batch_ph] + ad_ph[:2] + scene_ph + user_ph + ad_ph[2:] + + self.embedding_vars = [] + features = [] + for i, num in enumerate(feature_num): + self.embedding_vars.append(tf.get_variable("embedding_var_fea{}".format(i), [num, EMBEDDING_DIM], trainable=True)) + features.append(tf.nn.embedding_lookup(self.embedding_vars[i], feature_ph[i] - id_offset[i])) + + self.user_batch_embedded = tf.concat(features[9:13], axis=1) + self.ad_batch_embedded = tf.concat(features[1:3]+features[13:], axis=1) + self.scene_batch_embedded = tf.concat(features[3:9], axis=1) + self.time_batch_embedded = features[0] + self.clk_seq_batch_embedded = tf.nn.embedding_lookup(self.embedding_vars[0], self.clk_seq_batch_ph) + + if use_cartes: + self.carte_embeddings_var = [] + self.carte_batch_embedded = [] + for i, c in enumerate(use_cartes): + self.carte_embeddings_var.append(tf.get_variable("carte_embedding_var_{}".format(c), [carte_num_dict[c], EMBEDDING_DIM], trainable=True)) + self.carte_batch_embedded.append(tf.nn.embedding_lookup(self.carte_embeddings_var[i], self.carte_batch_ph[:, i])) + + ### fake carte ### + if emb_as_weight: + ''' + TODO: support multi-group cartesian feature, e.g., 13-16-19 + ''' + idx_w, idx_x = map(int, edge_type.split('-')) + + self.weight_embeddings_var = tf.get_variable("weight_embedding_var", [feature_num[idx_w] + 1, WEIGHT_EMB_NUM * WEIGHT_EMB_DIM], trainable=True) + self.weight_batch_embedded = tf.nn.embedding_lookup(self.weight_embeddings_var, feature_ph[idx_w]) + if use_new_seq_emb: + self.seq_embeddings_var = tf.get_variable("seq_embedding_var", [feature_num[idx_x], EMBEDDING_DIM * alpha], trainable=True) + self.seq_his_batch_embedded = tf.nn.embedding_lookup(self.seq_embeddings_var, feature_ph[idx_x]) + + with tf.name_scope('init_operation'): + for i, num in enumerate(feature_num): + embedding_placeholder = tf.placeholder(tf.float32,[num, EMBEDDING_DIM], name="emb_ph_{}".format(i)) + self.embedding_vars[i].assign(embedding_placeholder) + + if use_cartes: + self.carte_embedding_placeholder = [] + self.carte_embedding_init = [] + for i, c in enumerate(use_cartes): + self.carte_embedding_placeholder.append(tf.placeholder(tf.float32,[carte_num_dict[c], EMBEDDING_DIM], name="cid_emb_ph")) + self.carte_embedding_init.append(self.carte_embeddings_var[i].assign(self.carte_embedding_placeholder[i])) + + if self.use_negsample: + self.noclk_seq_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='noclk_seq_batch_ph') + self.noclk_seq_batch_embedded = tf.nn.embedding_lookup(self.embeddings_var, self.noclk_seq_batch_ph) + self.noclk_mask = tf.placeholder(tf.float32, [None, None], name='noclk_mask_batch_ph') + #self.mid_neg_batch_ph = tf.placeholder(tf.int32, [None, None], name='neg_his_batch_ph') + #self.cate_neg_batch_ph = tf.placeholder(tf.int32, [None, None], name='neg_cate_his_batch_ph') + + #self.neg_item_his_eb = tf.nn.embedding_lookup(self.mid_embeddings_var, self.mid_neg_batch_ph) + #self.neg_cate_his_eb = tf.nn.embedding_lookup(self.mid_embeddings_var, self.cate_neg_batch_ph) + #self.neg_his_eb = tf.concat([self.neg_item_his_eb,self.neg_cate_his_eb], axis=2) * tf.reshape(self.mask,(BATCH_SIZE, SEQ_LEN, 1)) + self.noclk_seq_eb = tf.concat(tf.unstack(tf.reshape(self.noclk_seq_batch_embedded,(BATCH_SIZE, 10, SEQ_LEN, EMBEDDING_DIM)), axis=1), axis=-1) * tf.reshape(self.noclk_mask,(BATCH_SIZE, SEQ_LEN, 1)) + + self.user_eb = tf.reshape(self.user_batch_embedded, [-1, EMBEDDING_DIM * 4]) # [batch, 4, dim] -> [batch, 4*dim] + self.ad_eb = tf.reshape(self.ad_batch_embedded, [-1, EMBEDDING_DIM * 10]) + self.scene_eb = tf.reshape(self.scene_batch_embedded, [-1, EMBEDDING_DIM * 6]) + self.time_eb = self.time_batch_embedded + + self.clk_seq_eb = tf.concat(tf.unstack(tf.reshape(self.clk_seq_batch_embedded,(BATCH_SIZE, 10, SEQ_LEN, EMBEDDING_DIM)), axis=1), axis=-1) * tf.reshape(self.clk_mask, (BATCH_SIZE, SEQ_LEN, 1)) + self.clk_seq_eb_sum = tf.reduce_sum(self.clk_seq_eb, 1) + + + self.carte_embs = [] + if use_cartes: + self.carte_embs += self.carte_batch_embedded + + if emb_as_weight: + if use_new_seq_emb: + seq_his_batch = self.seq_his_batch_embedded + else: + seq_his_batch = features[int(edge_type.split('-')[1])] + tmp_sum, tmp_seq = [], [] + if CALC_MODE.startswith("seq"): + shape = (BATCH_SIZE, SEQ_LEN, EMBEDDING_DIM) + else: + shape = (BATCH_SIZE, EMBEDDING_DIM * alpha) + for i in range(WEIGHT_EMB_NUM): + fake_carte_sum, fake_carte_seq = eb_as_weight(self.weight_batch_embedded[:, i * WEIGHT_EMB_DIM: (i+1) * WEIGHT_EMB_DIM], tf.reshape(seq_his_batch, shape), EMBEDDING_DIM, mode=CALC_MODE) + tmp_sum.append(fake_carte_sum) + tmp_seq.append(fake_carte_seq) + self.fake_carte_sum = tf.concat(tmp_sum, axis=1) + if keep_fake_carte_seq: + self.fake_carte_seq = tmp_seq + + + def build_fcn_net(self, inp, use_dice = False): + bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1') + dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1') + if use_dice: + dnn1 = dice(dnn1, name='dice_1') + else: + dnn1 = prelu(dnn1, scope='prelu_1') + + dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2') + if use_dice: + dnn2 = dice(dnn2, name='dice_2') + else: + dnn2 = prelu(dnn2, scope='prelu_2') + + dnn3 = tf.layers.dense(dnn2, 2, activation=None, name='f3') + self.y_hat = tf.nn.softmax(dnn3) + 0.00000001 + + with tf.name_scope('Metrics'): + # Cross-entropy loss and optimizer initialization + ctr_loss = - tf.reduce_mean(tf.log(self.y_hat) * self.target_ph) + self.loss = ctr_loss + if self.use_negsample: + self.loss += self.aux_loss + if self.reg: + self.loss += self.reg_loss + + tf.summary.scalar('loss', self.loss) + self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) + # Accuracy metric + self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32)) + tf.summary.scalar('accuracy', self.accuracy) + + self.merged = tf.summary.merge_all() + + def auxiliary_loss(self, h_states, click_seq, noclick_seq, clk_mask=None, noclk_mask = None, stag = None): + #mask = tf.cast(mask, tf.float32) + if noclk_mask is None: + noclk_mask = clk_mask + click_input_ = tf.concat([h_states, click_seq], -1) + noclick_input_ = tf.concat([h_states, noclick_seq], -1) + click_prop_ = self.auxiliary_net(click_input_, stag = stag)[:, :, 0] + noclick_prop_ = self.auxiliary_net(noclick_input_, stag = stag)[:, :, 0] + + click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * clk_mask + noclick_loss_ = - tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * noclk_mask + + loss_ = tf.reduce_mean(click_loss_ + noclick_loss_) + return loss_ + + def auxiliary_net(self, in_, stag='auxiliary_net'): + bn1 = tf.layers.batch_normalization(inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE) + dnn1 = tf.layers.dense(bn1, 100, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE) + dnn1 = tf.nn.sigmoid(dnn1) + dnn2 = tf.layers.dense(dnn1, 50, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE) + dnn2 = tf.nn.sigmoid(dnn2) + dnn3 = tf.layers.dense(dnn2, 2, activation=None, name='f3' + stag, reuse=tf.AUTO_REUSE) + y_hat = tf.nn.softmax(dnn3) + 0.000001 + return y_hat + + def init_uid_weight(self, sess, uid_weight): + sess.run(self.uid_embedding_init,feed_dict={self.uid_embedding_placeholder: uid_weight}) + + def init_mid_weight(self, sess, mid_weight): + sess.run([self.mid_embedding_init],feed_dict={self.mid_embedding_placeholder: mid_weight}) + + def save_mid_embedding_weight(self, sess): + embedding = sess.run(self.mid_embeddings_var) + return embedding + + def save_uid_embedding_weight(self, sess): + embedding = sess.run(self.uid_bp_memory) + return embedding + + def train(self, sess, inps): + input_dict = { + self.user_batch_ph: inps[0], + self.ad_batch_ph: inps[1], + self.scene_batch_ph: inps[2], + self.time_batch_ph: inps[3], + self.clk_seq_batch_ph: inps[4], + self.clk_mask: inps[6], + self.target_ph: inps[-2], + self.lr: inps[-1], + } + if use_cartes: + input_dict[self.carte_batch_ph] = inps[-3] + if "item-his_item" in use_cartes: + input_dict[self.item_carte_batch_ph] = inps[10] + if "cate-his_cate" in use_cartes: + input_dict[self.cate_carte_batch_ph] = inps[11] + if "item-his_cate" in use_cartes: + input_dict[self.item_cate_carte_batch_ph] = inps[12] + if "cate-his_item" in use_cartes: + input_dict[self.cate_item_carte_batch_ph] = inps[13] + + if self.use_negsample: + input_dict[self.noclk_seq_batch_ph] = inps[5] + input_dict[self.noclk_mask] = inps[7] + loss, aux_loss, accuracy, _ = sess.run([self.loss, self.aux_loss, self.accuracy, self.optimizer], feed_dict=input_dict) + else: + loss, accuracy, _ = sess.run([self.loss, self.accuracy, self.optimizer], feed_dict=input_dict) + aux_loss = 0 + return loss, accuracy, aux_loss + + def calculate(self, sess, inps): + input_dict = { + self.user_batch_ph: inps[0], + self.ad_batch_ph: inps[1], + self.scene_batch_ph: inps[2], + self.time_batch_ph: inps[3], + self.clk_seq_batch_ph: inps[4], + self.clk_mask: inps[6], + self.target_ph: inps[-1], + } + if use_cartes: + input_dict[self.carte_batch_ph] = inps[-2] + + if "item-his_item" in use_cartes: + input_dict[self.item_carte_batch_ph] = inps[9] + if "cate-his_cate" in use_cartes: + input_dict[self.cate_carte_batch_ph] = inps[10] + if "item-his_cate" in use_cartes: + input_dict[self.item_cate_carte_batch_ph] = inps[11] + if "cate-his_item" in use_cartes: + input_dict[self.cate_item_carte_batch_ph] = inps[12] + + if self.use_negsample: + input_dict[self.noclk_seq_batch_ph] = inps[5] + input_dict[self.noclk_mask] = inps[7] + probs, loss, accuracy, aux_loss = sess.run([self.y_hat, self.loss, self.accuracy, self.aux_loss], feed_dict=input_dict) + else: + probs, loss, accuracy = sess.run([self.y_hat, self.loss, self.accuracy], feed_dict=input_dict) + aux_loss = 0 + return probs, loss, accuracy, aux_loss + + def save(self, sess, path): + saver = tf.train.Saver() + saver.save(sess, save_path=path) + + def restore(self, sess, path): + saver = tf.train.Saver() + saver.restore(sess, save_path=path) + print('model restored from %s' % path) + +class Model_DNN(Model): + def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): + super(Model_DNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, Flag="DNN") + + #inp = tf.concat([self.item_eb, self.item_his_eb_sum], 1) + if emb_as_weight: + self.carte_embs.append(self.fake_carte_sum) + inp = tf.concat([self.user_eb, self.ad_eb, self.scene_eb, self.time_eb] + self.carte_embs, 1) + self.build_fcn_net(inp, use_dice=False) + + +class Model_FFM(Model): + def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): + super(Model_DNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, Flag="DNN") + + inp = tf.concat([self.item_eb, self.item_his_eb_sum], 1) + self.build_fcn_net(inp, use_dice=False) + + + +class Model_PNN(Model): + def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): + super(Model_PNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, Flag="PNN") + + inp = tf.concat([self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum], 1) + self.build_fcn_net(inp, use_dice=False) + + +class Model_GRU4REC(Model): + def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): + super(Model_GRU4REC, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, Flag="GRU4REC") + with tf.name_scope('rnn_1'): + self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE) + rnn_outputs, final_state1 = dynamic_rnn(GRUCell(2*EMBEDDING_DIM), inputs=self.item_his_eb, + sequence_length=self.sequence_length, dtype=tf.float32, + scope="gru1") + tf.summary.histogram('GRU_outputs', rnn_outputs) + + inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state1], 1) + self.build_fcn_net(inp, use_dice=False) + + +class Model_DIN(Model): + def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): + super(Model_DIN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, Flag="DIN") + with tf.name_scope('Attention_layer'): + attention_output = din_attention(self.item_eb, self.item_his_eb, HIDDEN_SIZE, self.mask) + att_fea = tf.reduce_sum(attention_output, 1) + tf.summary.histogram('att_fea', att_fea) + inp = tf.concat([self.item_eb, self.item_his_eb_sum, att_fea], -1) + self.build_fcn_net(inp, use_dice=False) + + +class Model_ARNN(Model): + def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): + super(Model_ARNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, Flag="ARNN") + with tf.name_scope('rnn_1'): + self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE) + rnn_outputs, final_state1 = dynamic_rnn(GRUCell(2*EMBEDDING_DIM), inputs=self.item_his_eb, + sequence_length=self.sequence_length, dtype=tf.float32, + scope="gru1") + tf.summary.histogram('GRU_outputs', rnn_outputs) + # Attention layer + with tf.name_scope('Attention_layer_1'): + att_gru = din_attention(self.item_eb, rnn_outputs, HIDDEN_SIZE, self.mask) + att_gru = tf.reduce_sum(att_gru, 1) + + inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state1, att_gru], -1) + self.build_fcn_net(inp, use_dice=False) + +class Model_RUM(Model): + def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, MEMORY_SIZE, SEQ_LEN=400, mask_flag=True): + super(Model_RUM, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, Flag="RUM") + + def clear_mask_state(state, begin_state, mask, t): + state["controller_state"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1))) * begin_state["controller_state"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1)) * state["controller_state"] + state["M"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["M"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["M"] + return state + + cell = rum.RUMCell(controller_units=HIDDEN_SIZE, memory_size=MEMORY_SIZE, memory_vector_dim=2*EMBEDDING_DIM,read_head_num=1, write_head_num=1, + reuse=False, output_dim=HIDDEN_SIZE, clip_value=20, batch_size=BATCH_SIZE) + + state = cell.zero_state(BATCH_SIZE, tf.float32) + begin_state = state + for t in range(SEQ_LEN): + output, state = cell(self.item_his_eb[:, t, :], state) + if mask_flag: + state = clear_mask_state(state, begin_state, self.mask, t) + + final_state = output + before_memory = state['M'] + rum_att_hist = din_attention(self.item_eb, before_memory, HIDDEN_SIZE, None) + + inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state, tf.squeeze(rum_att_hist)], 1) + + self.build_fcn_net(inp, use_dice=False) + +class Model_DIEN(Model): + def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=400, use_negsample=False, use_mi_cons=False): + super(Model_DIEN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, use_negsample, Flag="DIEN") + + with tf.name_scope('rnn_1'): + self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE) + rnn_outputs, _ = dynamic_rnn(GRUCell(10*EMBEDDING_DIM), inputs=self.clk_seq_eb, + sequence_length=self.sequence_length, dtype=tf.float32, + scope="gru1") + tf.summary.histogram('GRU_outputs', rnn_outputs) + + if use_negsample: + if use_mi_cons: + #aux_loss_1 = self.info_NCE(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :], self.mask[:, 1:]) + #aux_loss_1 = self.info_NCE_aux(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :], self.neg_his_eb[:, 1:, :], self.mask[:, 1:]) + aux_loss_1 = self.mi_loss(rnn_outputs[:, :-1, :], self.clk_seq_eb[:, 1:, :], + self.noclk_seq_eb[:, 1:, :], self.mask[:, 1:], stag = "mi_0") + else: + aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :], self.clk_seq_eb[:, 1:, :], + self.noclk_seq_eb[:, 1:, :], self.clk_mask[:, 1:], self.noclk_mask[:, 1:], stag = "bigru_0") + self.aux_loss = aux_loss_1 + + # Attention layer + with tf.name_scope('Attention_layer_1'): + att_outputs, alphas = din_attention(self.ad_eb, rnn_outputs, HIDDEN_SIZE, mask=self.clk_mask, mode="LIST", return_alphas=True) + tf.summary.histogram('alpha_outputs', alphas) + + with tf.name_scope('rnn_2'): + rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE), inputs=rnn_outputs, + att_scores = tf.expand_dims(alphas, -1), + sequence_length=self.sequence_length, dtype=tf.float32, + scope="gru2") + tf.summary.histogram('GRU2_Final_State', final_state2) + + #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum], 1) + #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.item_carte_eb_sum], 1) + #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.cate_carte_eb_sum], 1) + #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.item_cate_carte_eb_sum], 1) + #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.cate_carte_eb_sum], 1) + #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.item_carte_eb_sum, self.cate_carte_eb_sum], 1) + + + #if attention + + if emb_as_weight: + if keep_fake_carte_seq: + if carte_with_gru: + with tf.name_scope('rnn_3'): + self.fake_carte_seq, _ = dynamic_rnn(GRUCell(EMBEDDING_DIM), inputs=self.fake_carte_seq, + sequence_length=self.sequence_length, dtype=tf.float32, + scope="gru3") + + with tf.name_scope('Attention_layer_2'): + carte_att_outputs, _ = din_attention(self.mid_batch_embedded, self.fake_carte_seq, HIDDEN_SIZE, mask=self.mask, stag="carte", mode="SUM", return_alphas=True) + self.carte_embs.append(tf.reduce_sum(carte_att_outputs, 1)) + #self.carte_embs.append(self.fake_carte_sum) + else: + self.carte_embs.append(self.fake_carte_sum) + inp = tf.concat([self.user_eb, self.ad_eb, self.scene_eb, self.time_eb, final_state2, self.clk_seq_eb_sum, self.ad_eb*self.clk_seq_eb_sum] + self.carte_embs, 1) + self.build_fcn_net(inp, use_dice=False) + + def neg_sample(self, neg_his_emb, K=10, mode="random"): + shape = tf.shape(neg_his_emb) + batch, seq, dim = shape[0], shape[1], shape[2] + + if mode == "random": + neg = tf.expand_dims(neg_his_emb, 1) #[batch, 1, seq, dim] + neg = tf.tile(neg, [1,seq, 1,1]) #[batch, seq, seq, dim] + # index = tf.random_uniform((batch, seq, K), minval=0, maxval=seq, dtype=tf.int32) + # neg = tf.batch_gather(neg, index) #[batch, seq, K, dim] + neg = neg[:, :, :K, :] + return neg + elif mode == "aux": + neg = tf.expand_dims(neg_his_emb, 1) + return neg + + def mi_loss_(self, h_states, click_seq, noclick_seq, mask = None, stag = None): + #mask = tf.cast(mask, tf.float32) + ''' + h = self.mlp(h_states, stag = stag) + pos = self.mlp(click_seq, stag = stag) + neg = self.mlp(noclick_seq, stag = stag) + + scores_pos = tf.matmul(h, pos) + scores_neg = tf.matmul(h, neg) + joint = tf.linalg.diag_part(score_pos) + ''' + pos = tf.concat([h_states, click_seq], axis=2) + f_pos = self.mlp(pos) # [batch, seq, 1] + + K = 99 + neg = self.neg_sample(noclick_seq, K) + h_states_tiled = tf.tile(tf.expand_dims(h_states, 2), [1,1,K,1]) # [batch, seq, K, dim] + total = tf.concat([h_states_tiled, neg], axis=3) + f_neg = self.mlp(total) #[batch, seq, K, 1] + f_neg = tf.reduce_sum(f_neg, axis=2) + f_total = f_pos + f_neg + + loss_ = tf.reshape(tf.log(f_pos / f_total), [-1, tf.shape(click_seq)[1]]) * mask + loss_ = - tf.reduce_mean(loss_) + + return loss_ + + def mi_loss(self, h_states, click_seq, noclick_seq, mask, stag='NCE'): + exp = 'random_1' + if exp == 'random_1': + shape = tf.shape(h_states) + batch, len_seq, dim = shape[0], shape[1], shape[2] + Wk_ct = [] + x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc') + x = tf.unstack(x, axis=1) + neg = tf.layers.dense(noclick_seq, 256, activation=None, name='neg_enc') + neg = tf.unstack(neg, axis=1) + c_t = tf.unstack(h_states, axis=1) + with tf.name_scope(stag): + for i in range(len(c_t)): + Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i))) + #nce = 0 + nce = [] + for i in range(len(c_t)): + s_p = tf.reduce_sum(x[i] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1] + s_n = tf.reduce_sum(neg[i] * Wk_ct[i], axis=1, keep_dims=True) + score = tf.concat([s_p, s_n], axis=1) + score = tf.nn.log_softmax(tf.exp(score), dim=1) + score = tf.reshape(score[:, 0], [-1]) + nce.append(score) + nce = tf.stack(nce, axis=1) * mask + nce = tf.reduce_sum(nce) + nce /= -1.0 * tf.cast(batch*len_seq, tf.float32) + return nce + elif exp == 'random_all': + shape = tf.shape(h_states) + batch, len_seq, dim = shape[0], shape[1], shape[2] + Wk_ct = [] + x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc') + x = tf.unstack(x, axis=1) + neg = tf.layers.dense(noclick_seq, 256, activation=None, name='neg_enc') + neg = tf.unstack(neg, axis=1) + c_t = tf.unstack(h_states, axis=1) + with tf.name_scope(stag): + for i in range(len(c_t)): + Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i))) + nce = [] + for i in range(len(c_t)): + s_p = tf.reduce_sum(x[i] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1] + s_n = [] + for j in range(len(neg)): + s_n.append(tf.reduce_sum(neg[j] * Wk_ct[i], axis=1, keep_dims=True)) + score = tf.concat([s_p] + s_n, axis=1) + score = tf.nn.log_softmax(tf.exp(score), dim=1) + score = tf.reshape(score[:, 0], [-1]) + nce.append(score) + nce = tf.stack(nce, axis=1) * mask + nce = tf.reduce_sum(nce) + nce /= -1.0 * tf.cast(batch*len_seq, tf.float32) + return nce + + elif exp == 'batch_1': + shape = tf.shape(click_seq) + batch, len_seq, dim = shape[0], shape[1], shape[2] + x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc') + x = tf.unstack(x, axis=1) + c_t = tf.unstack(h_states, axis=1) + # different W for every step + rand_idx = 12 + Wk_ct = [] + with tf.name_scope(stag): + for i in range(len(c_t)): + Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i))) + nce = [] + for i in range(len(c_t)): + x_i = tf.tile(x[i], [2,1]) + s_p = tf.reduce_sum(x_i[0:128, :] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1] + s_n = tf.reduce_sum(x_i[rand_idx:rand_idx+128] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1] + score = tf.concat([s_p, s_n], axis=1) + score = tf.nn.log_softmax(tf.exp(score), dim=1) # softmax over batch + score = tf.reshape(score[:, 0], [-1]) + nce.append(score) + nce =tf.stack(nce, axis=1) * mask + nce = tf.reduce_sum(nce) + nce /= -1.0*tf.cast(batch*len_seq, tf.float32) + return nce + + elif exp == 'batch_all': + shape = tf.shape(click_seq) + batch, len_seq, dim = shape[0], shape[1], shape[2] + x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc') + x = tf.unstack(x, axis=1) + c_t = tf.unstack(h_states, axis=1) + # different W for every step + Wk_ct = [] + with tf.name_scope(stag): + for i in range(len(c_t)): + Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i))) + nce = [] + for i in range(len(c_t)): + score = tf.exp(tf.matmul(x[i], tf.transpose(Wk_ct[i]))) + score = tf.nn.log_softmax(score, dim=0) # softmax over batch + nce.append(tf.linalg.diag_part(score)) + #nce += tf.reduce_sum(tf.linalg.diag_part(score)) + nce = tf.stack(nce, axis=1) * mask + nce = tf.reduce_sum(nce) + nce /= -1.0*tf.cast(batch*len_seq, tf.float32) + return nce + + + def mlp(self, in_, stag='mlp'): + bn1 = tf.layers.batch_normalization(inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE) + dnn1 = tf.layers.dense(bn1, 1024, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE) + dnn2 = tf.layers.dense(dnn1, 512, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE) + dnn3 = tf.layers.dense(dnn2, 256, activation=None, name='f3' + stag, reuse=tf.AUTO_REUSE) + return dnn3 + ''' + dnn4 = tf.layers.dense(dnn3, 1, activation=None, name='f4' + stag, reuse=tf.AUTO_REUSE) + dnn4 = tf.nn.sigmoid(dnn4) + return dnn4 + y_hat = tf.nn.softmax(dnn3) + 0.000001 + return y_hat + ''' + + def auxiliary_loss(self, h_states, click_seq, noclick_seq, clk_mask=None, noclk_mask=None, stag=None): + if noclk_mask is None: + noclk_mask = clk_mask + # postive + click_input = tf.concat([h_states, click_seq], -1) + click_prop = self.auxiliary_net(click_input, stag = stag)[:, :, 0] + click_loss = - tf.reshape(tf.log(click_prop), [-1, tf.shape(click_seq)[1]]) * clk_mask + + # negative + exp = 'random_1' + if exp =='random_1': + return super(Model_DIEN, self).auxiliary_loss(h_states, click_seq, noclick_seq, clk_mask, noclk_mask, stag) + elif exp == 'random_all': + batch = 99 + noclick_seq_ = tf.tile(noclick_seq, [1,2,1]) # shape = [batch, 2 * seq, dim] for sliding window + noclick_input = [] + for i in range(99): + noclick_input.append(tf.concat([h_states, noclick_seq_[:, i:i+batch, :]], axis=-1)) + noclick_input = tf.concat(noclick_input, axis=0) + mask = tf.tile(mask, [batch, 1]) + elif exp == 'batch_1': + batch = 128 + h_states = tf.unstack(h_states, axis=1) + click_seq = tf.unstack(click_seq, axis=1) + noclick_input = [] + rand_idx = 12 + for i in range(len(click_seq)): + h = h_states[i] # seq i of the batch, shape = [batch, dim] + c = click_seq[i] + c = tf.tile(c, [2, 1]) # sliding window + noclick_input.append(tf.concat([h, c[rand_idx:rand_idx+batch,:]], axis=1)) + noclick_input = tf.stack(noclick_input, axis=1) + elif exp == 'batch_all': + batch = 128 + h_states = tf.unstack(h_states, axis=1) + click_seq = tf.unstack(click_seq, axis=1) + noclick_input = [] + for i in range(len(click_seq)): + h = h_states[i] # seq i of the batch, shape = [batch, dim] + c = click_seq[i] + c = tf.tile(c, [2, 1]) # sliding window + neg = [] + for i in range(1, batch): + neg.append(tf.concat([h, c[i:i+batch,:]], axis=1)) + noclick_input.append(tf.concat(neg, axis=0)) + noclick_input = tf.stack(noclick_input, axis=1) + mask = tf.tile(mask, [batch-1, 1]) + + noclick_prop = self.auxiliary_net(noclick_input, stag = stag)[:, :, 0] + noclick_loss = - tf.reshape(tf.log(1.0 - noclick_prop), [-1, tf.shape(noclick_seq)[1]]) * mask + loss_ = tf.reduce_mean(click_loss) + tf.reduce_mean(noclick_loss) + return loss_ + + def aux_batch(self, h_states, click_seq, noclick_seq, mask = None, stag = None): + #mask = tf.cast(mask, tf.float32) + # batch = tf.shape(h_states)[0] + batch = 128 + click_input_ = tf.concat([h_states, click_seq], -1) + h_states_ = tf.unstack(h_states, axis=1) + click_seq_ = tf.unstack(click_seq, axis=1) + neg_input_total = [] + for i in range(len(click_seq_)): + h = h_states_[i] # seq i of the batch [batch, dim] + c = click_seq_[i] + c = tf.tile(c, [2, 1]) # sliding window + neg = [] + for i in range(1, batch): + neg.append(tf.concat([h, c[i:i+batch,:]], axis=1)) + neg_input_total.append(tf.concat(neg, axis=0)) + noclick_input_ = tf.stack(neg_input_total, axis=1) + #noclick_input_ = tf.concat([h_states, noclick_seq], -1) + click_prop_ = self.auxiliary_net(click_input_, stag = stag)[:, :, 0] + noclick_prop_ = self.auxiliary_net(noclick_input_, stag = stag)[:, :, 0] + + click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * mask + mask = tf.tile(mask, [batch-1, 1]) + noclick_loss_ = - tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * mask + + #loss_ = tf.reduce_mean(click_loss_ + noclick_loss_) + loss_ = tf.reduce_mean(click_loss_) + tf.reduce_mean(noclick_loss_) + return loss_ + + + + +class Model_MIMN(Model): + def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, MEMORY_SIZE, SEQ_LEN=400, Mem_Induction=0, Util_Reg=0, use_negsample=False, mask_flag=False): + super(Model_MIMN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, + BATCH_SIZE, SEQ_LEN, use_negsample, Flag="MIMN") + self.reg = Util_Reg + + def clear_mask_state(state, begin_state, begin_channel_rnn_state, mask, cell, t): + state["controller_state"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1))) * begin_state["controller_state"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1)) * state["controller_state"] + state["M"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["M"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["M"] + state["key_M"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["key_M"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["key_M"] + state["sum_aggre"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["sum_aggre"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["sum_aggre"] + if Mem_Induction > 0: + temp_channel_rnn_state = [] + for i in range(MEMORY_SIZE): + temp_channel_rnn_state.append(cell.channel_rnn_state[i] * tf.expand_dims(mask[:,t], axis=1) + begin_channel_rnn_state[i]*(1- tf.expand_dims(mask[:,t], axis=1))) + cell.channel_rnn_state = temp_channel_rnn_state + temp_channel_rnn_output = [] + for i in range(MEMORY_SIZE): + temp_output = cell.channel_rnn_output[i] * tf.expand_dims(mask[:,t], axis=1) + begin_channel_rnn_output[i]*(1- tf.expand_dims(self.mask[:,t], axis=1)) + temp_channel_rnn_output.append(temp_output) + cell.channel_rnn_output = temp_channel_rnn_output + + return state + + cell = mimn.MIMNCell(controller_units=HIDDEN_SIZE, memory_size=MEMORY_SIZE, memory_vector_dim=2*EMBEDDING_DIM,read_head_num=1, write_head_num=1, + reuse=False, output_dim=HIDDEN_SIZE, clip_value=20, batch_size=BATCH_SIZE, mem_induction=Mem_Induction, util_reg=Util_Reg) + + state = cell.zero_state(BATCH_SIZE, tf.float32) + if Mem_Induction > 0: + begin_channel_rnn_output = cell.channel_rnn_output + else: + begin_channel_rnn_output = 0.0 + + begin_state = state + self.state_list = [state] + self.mimn_o = [] + for t in range(SEQ_LEN): + output, state, temp_output_list = cell(self.item_his_eb[:, t, :], state) + if mask_flag: + state = clear_mask_state(state, begin_state, begin_channel_rnn_output, self.mask, cell, t) + self.mimn_o.append(output) + self.state_list.append(state) + + self.mimn_o = tf.stack(self.mimn_o, axis=1) + self.state_list.append(state) + mean_memory = tf.reduce_mean(state['sum_aggre'], axis=-2) + + before_aggre = state['w_aggre'] + read_out, _, _ = cell(self.item_eb, state) + + if use_negsample: + aux_loss_1 = self.auxiliary_loss(self.mimn_o[:, :-1, :], self.item_his_eb[:, 1:, :], + self.neg_his_eb[:, 1:, :], self.mask[:, 1:], stag = "bigru_0") + self.aux_loss = aux_loss_1 + + if self.reg: + self.reg_loss = cell.capacity_loss(before_aggre) + else: + self.reg_loss = tf.zeros(1) + + if Mem_Induction == 1: + channel_memory_tensor = tf.concat(temp_output_list, 1) + multi_channel_hist = din_attention(self.item_eb, channel_memory_tensor, HIDDEN_SIZE, None, stag='pal') + inp = tf.concat([self.item_eb, self.item_his_eb_sum, read_out, tf.squeeze(multi_channel_hist), mean_memory*self.item_eb], 1) + else: + inp = tf.concat([self.item_eb, self.item_his_eb_sum, read_out, mean_memory*self.item_eb], 1) + + self.build_fcn_net(inp, use_dice=False) diff --git a/modelzoo/CAN/script/process_data.py b/modelzoo/CAN/script/process_data.py new file mode 100644 index 00000000000..18bf7ebdd7d --- /dev/null +++ b/modelzoo/CAN/script/process_data.py @@ -0,0 +1,101 @@ +import sys +import random +import time + +def process_meta(file): + fi = open(file, "r") + fo = open("item-info", "w") + for line in fi: + obj = eval(line) + cat = obj["categories"][0][-1] + print(obj["asin"] + "\t" + cat,file=fo) + +def process_reviews(file): + fi = open(file, "r") + user_map = {} + fo = open("reviews-info", "w") + for line in fi: + obj = eval(line) + userID = obj["reviewerID"] + itemID = obj["asin"] + rating = obj["overall"] + time = obj["unixReviewTime"] + print(userID + "\t" + itemID + "\t" + str(rating) + "\t" + str(time),file=fo) + +def manual_join(): + f_rev = open("reviews-info", "r") + user_map = {} + item_list = [] + for line in f_rev: + line = line.strip() + items = line.split("\t") + #loctime = time.localtime(float(items[-1])) + #items[-1] = time.strftime('%Y-%m-%d', loctime) + if items[0] not in user_map: + user_map[items[0]]= [] + user_map[items[0]].append(("\t".join(items), float(items[-1]))) + item_list.append(items[1]) + f_meta = open("item-info", "r") + meta_map = {} + for line in f_meta: + arr = line.strip().split("\t") + if arr[0] not in meta_map: + meta_map[arr[0]] = arr[1] + arr = line.strip().split("\t") + fo = open("jointed-new", "w") + for key in user_map: + sorted_user_bh = sorted(user_map[key], key=lambda x:x[1]) + for line, t in sorted_user_bh: + items = line.split("\t") + asin = items[1] + j = 0 + while True: + asin_neg_index = random.randint(0, len(item_list) - 1) + asin_neg = item_list[asin_neg_index] + if asin_neg == asin: + continue + items[1] = asin_neg + print("0" + "\t" + "\t".join(items) + "\t" + meta_map[asin_neg],file=fo) + j += 1 + if j == 1: #negative sampling frequency + break + if asin in meta_map: + print("1" + "\t" + line + "\t" + meta_map[asin],file=fo) + else: + print("1" + "\t" + line + "\t" + "default_cat",file=fo) + + +def split_test(): + fi = open("jointed-new", "r") + fo = open("jointed-new-split-info", "w") + user_count = {} + for line in fi: + line = line.strip() + user = line.split("\t")[1] + if user not in user_count: + user_count[user] = 0 + user_count[user] += 1 + fi.seek(0) + i = 0 + last_user = "A26ZDKC53OP6JD" + for line in fi: + line = line.strip() + user = line.split("\t")[1] + if user == last_user: + if i < user_count[user] - 2: # 1 + negative samples + print("20180118" + "\t" + line,file=fo) + else: + print("20190119" + "\t" + line,file=fo) + else: + last_user = user + i = 0 + if i < user_count[user] - 2: + print("20180118" + "\t" + line,file=fo) + else: + print("20190119" + "\t" + line,file=fo) + i += 1 + +process_meta(sys.argv[1]) +process_reviews(sys.argv[2]) +manual_join() +split_test() diff --git a/modelzoo/CAN/script/rnn.py b/modelzoo/CAN/script/rnn.py new file mode 100644 index 00000000000..da2351b2dbb --- /dev/null +++ b/modelzoo/CAN/script/rnn.py @@ -0,0 +1,1454 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""RNN helpers for TensorFlow models. + + +@@bidirectional_dynamic_rnn +@@dynamic_rnn +@@raw_rnn +@@static_rnn +@@static_state_saving_rnn +@@static_bidirectional_rnn +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util import nest + + +# pylint: disable=protected-access +_concat = rnn_cell_impl._concat +#_like_rnncell = rnn_cell_impl._like_rnncell +_like_rnncell = rnn_cell_impl.assert_like_rnncell +# pylint: enable=protected-access + + +def _transpose_batch_time(x): + """Transpose the batch and time dimensions of a Tensor. + + Retains as much of the static shape information as possible. + + Args: + x: A tensor of rank 2 or higher. + + Returns: + x transposed along the first two dimensions. + + Raises: + ValueError: if `x` is rank 1 or lower. + """ + x_static_shape = x.get_shape() + if x_static_shape.ndims is not None and x_static_shape.ndims < 2: + raise ValueError( + "Expected input tensor %s to have rank at least 2, but saw shape: %s" % + (x, x_static_shape)) + x_rank = array_ops.rank(x) + x_t = array_ops.transpose( + x, array_ops.concat( + ([1, 0], math_ops.range(2, x_rank)), axis=0)) + x_t.set_shape( + tensor_shape.TensorShape([ + x_static_shape[1].value, x_static_shape[0].value + ]).concatenate(x_static_shape[2:])) + return x_t + + +def _best_effort_input_batch_size(flat_input): + """Get static input batch size if available, with fallback to the dynamic one. + + Args: + flat_input: An iterable of time major input Tensors of shape [max_time, + batch_size, ...]. All inputs should have compatible batch sizes. + + Returns: + The batch size in Python integer if available, or a scalar Tensor otherwise. + + Raises: + ValueError: if there is any input with an invalid shape. + """ + for input_ in flat_input: + shape = input_.shape + if shape.ndims is None: + continue + if shape.ndims < 2: + raise ValueError( + "Expected input tensor %s to have rank at least 2" % input_) + batch_size = shape[1].value + if batch_size is not None: + return batch_size + # Fallback to the dynamic batch size of the first input. + return array_ops.shape(flat_input[0])[1] + + +def _infer_state_dtype(explicit_dtype, state): + """Infer the dtype of an RNN state. + + Args: + explicit_dtype: explicitly declared dtype or None. + state: RNN's hidden state. Must be a Tensor or a nested iterable containing + Tensors. + + Returns: + dtype: inferred dtype of hidden state. + + Raises: + ValueError: if `state` has heterogeneous dtypes or is empty. + """ + if explicit_dtype is not None: + return explicit_dtype + elif nest.is_sequence(state): + inferred_dtypes = [element.dtype for element in nest.flatten(state)] + if not inferred_dtypes: + raise ValueError("Unable to infer dtype from empty state.") + all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) + if not all_same: + raise ValueError( + "State has tensors of different inferred_dtypes. Unable to infer a " + "single representative dtype.") + return inferred_dtypes[0] + else: + return state.dtype + + +# pylint: disable=unused-argument +def _rnn_step( + time, sequence_length, min_sequence_length, max_sequence_length, + zero_output, state, call_cell, state_size, skip_conditionals=False): + """Calculate one step of a dynamic RNN minibatch. + + Returns an (output, state) pair conditioned on the sequence_lengths. + When skip_conditionals=False, the pseudocode is something like: + + if t >= max_sequence_length: + return (zero_output, state) + if t < min_sequence_length: + return call_cell() + + # Selectively output zeros or output, old state or new state depending + # on if we've finished calculating each row. + new_output, new_state = call_cell() + final_output = np.vstack([ + zero_output if time >= sequence_lengths[r] else new_output_r + for r, new_output_r in enumerate(new_output) + ]) + final_state = np.vstack([ + state[r] if time >= sequence_lengths[r] else new_state_r + for r, new_state_r in enumerate(new_state) + ]) + return (final_output, final_state) + + Args: + time: Python int, the current time step + sequence_length: int32 `Tensor` vector of size [batch_size] + min_sequence_length: int32 `Tensor` scalar, min of sequence_length + max_sequence_length: int32 `Tensor` scalar, max of sequence_length + zero_output: `Tensor` vector of shape [output_size] + state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, + or a list/tuple of such tensors. + call_cell: lambda returning tuple of (new_output, new_state) where + new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. + new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. + state_size: The `cell.state_size` associated with the state. + skip_conditionals: Python bool, whether to skip using the conditional + calculations. This is useful for `dynamic_rnn`, where the input tensor + matches `max_sequence_length`, and using conditionals just slows + everything down. + + Returns: + A tuple of (`final_output`, `final_state`) as given by the pseudocode above: + final_output is a `Tensor` matrix of shape [batch_size, output_size] + final_state is either a single `Tensor` matrix, or a tuple of such + matrices (matching length and shapes of input `state`). + + Raises: + ValueError: If the cell returns a state tuple whose length does not match + that returned by `state_size`. + """ + + # Convert state to a list for ease of use + flat_state = nest.flatten(state) + flat_zero_output = nest.flatten(zero_output) + + def _copy_one_through(output, new_output): + # If the state contains a scalar value we simply pass it through. + if output.shape.ndims == 0: + return new_output + copy_cond = (time >= sequence_length) + with ops.colocate_with(new_output): + return array_ops.where(copy_cond, output, new_output) + + def _copy_some_through(flat_new_output, flat_new_state): + # Use broadcasting select to determine which values should get + # the previous state & zero output, and which values should get + # a calculated state & output. + flat_new_output = [ + _copy_one_through(zero_output, new_output) + for zero_output, new_output in zip(flat_zero_output, flat_new_output)] + flat_new_state = [ + _copy_one_through(state, new_state) + for state, new_state in zip(flat_state, flat_new_state)] + return flat_new_output + flat_new_state + + def _maybe_copy_some_through(): + """Run RNN step. Pass through either no or some past state.""" + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + flat_new_state = nest.flatten(new_state) + flat_new_output = nest.flatten(new_output) + return control_flow_ops.cond( + # if t < min_seq_len: calculate and return everything + time < min_sequence_length, lambda: flat_new_output + flat_new_state, + # else copy some of it through + lambda: _copy_some_through(flat_new_output, flat_new_state)) + + # TODO(ebrevdo): skipping these conditionals may cause a slowdown, + # but benefits from removing cond() and its gradient. We should + # profile with and without this switch here. + if skip_conditionals: + # Instead of using conditionals, perform the selective copy at all time + # steps. This is faster when max_seq_len is equal to the number of unrolls + # (which is typical for dynamic_rnn). + new_output, new_state = call_cell() + nest.assert_same_structure(state, new_state) + new_state = nest.flatten(new_state) + new_output = nest.flatten(new_output) + final_output_and_state = _copy_some_through(new_output, new_state) + else: + empty_update = lambda: flat_zero_output + flat_state + final_output_and_state = control_flow_ops.cond( + # if t >= max_seq_len: copy all state through, output zeros + time >= max_sequence_length, empty_update, + # otherwise calculation is required: copy some or all of it through + _maybe_copy_some_through) + + if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): + raise ValueError("Internal error: state and output were not concatenated " + "correctly.") + final_output = final_output_and_state[:len(flat_zero_output)] + final_state = final_output_and_state[len(flat_zero_output):] + + for output, flat_output in zip(final_output, flat_zero_output): + output.set_shape(flat_output.get_shape()) + for substate, flat_substate in zip(final_state, flat_state): + substate.set_shape(flat_substate.get_shape()) + + final_output = nest.pack_sequence_as( + structure=zero_output, flat_sequence=final_output) + final_state = nest.pack_sequence_as( + structure=state, flat_sequence=final_state) + + return final_output, final_state + + +def _reverse_seq(input_seq, lengths): + """Reverse a list of Tensors up to specified lengths. + + Args: + input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) + or nested tuples of tensors. + lengths: A `Tensor` of dimension batch_size, containing lengths for each + sequence in the batch. If "None" is specified, simply reverses + the list. + + Returns: + time-reversed sequence + """ + if lengths is None: + return list(reversed(input_seq)) + + flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) + + flat_results = [[] for _ in range(len(input_seq))] + for sequence in zip(*flat_input_seq): + input_shape = tensor_shape.unknown_shape( + ndims=sequence[0].get_shape().ndims) + for input_ in sequence: + input_shape.merge_with(input_.get_shape()) + input_.set_shape(input_shape) + + # Join into (time, batch_size, depth) + s_joined = array_ops.stack(sequence) + + # Reverse along dimension 0 + s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) + # Split again into list + result = array_ops.unstack(s_reversed) + for r, flat_result in zip(result, flat_results): + r.set_shape(input_shape) + flat_result.append(r) + + results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) + for input_, flat_result in zip(input_seq, flat_results)] + return results + + +def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, + initial_state_fw=None, initial_state_bw=None, + dtype=None, parallel_iterations=None, + swap_memory=False, time_major=False, scope=None): + """Creates a dynamic version of bidirectional recurrent neural network. + + Takes input and builds independent forward and backward RNNs. The input_size + of forward and backward cell must match. The initial state for both directions + is zero by default (but can be set optionally) and no intermediate states are + ever returned -- the network is fully unrolled for the given (passed in) + length(s) of the sequence(s) or completely unrolled if length(s) is not + given. + + Args: + cell_fw: An instance of RNNCell, to be used for forward direction. + cell_bw: An instance of RNNCell, to be used for backward direction. + inputs: The RNN inputs. + If time_major == False (default), this must be a tensor of shape: + `[batch_size, max_time, ...]`, or a nested tuple of such elements. + If time_major == True, this must be a tensor of shape: + `[max_time, batch_size, ...]`, or a nested tuple of such elements. + sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, + containing the actual lengths for each of the sequences in the batch. + If not provided, all batch entries are assumed to be full sequences; and + time reversal is applied from time `0` to `max_time` for each sequence. + initial_state_fw: (optional) An initial state for the forward RNN. + This must be a tensor of appropriate type and shape + `[batch_size, cell_fw.state_size]`. + If `cell_fw.state_size` is a tuple, this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. + initial_state_bw: (optional) Same as for `initial_state_fw`, but using + the corresponding properties of `cell_bw`. + dtype: (optional) The data type for the initial states and expected output. + Required if initial_states are not provided or RNN states have a + heterogeneous dtype. + parallel_iterations: (Default: 32). The number of iterations to run in + parallel. Those operations which do not have any temporal dependency + and can be run in parallel, will be. This parameter trades off + time for space. Values >> 1 use more memory but take less time, + while smaller values use less memory but computations take longer. + swap_memory: Transparently swap the tensors produced in forward inference + but needed for back prop from GPU to CPU. This allows training RNNs + which would typically not fit on a single GPU, with very minimal (or no) + performance penalty. + time_major: The shape format of the `inputs` and `outputs` Tensors. + If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. + If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. + Using `time_major = True` is a bit more efficient because it avoids + transposes at the beginning and end of the RNN calculation. However, + most TensorFlow data is batch-major, so by default this function + accepts input and emits output in batch-major form. + scope: VariableScope for the created subgraph; defaults to + "bidirectional_rnn" + + Returns: + A tuple (outputs, output_states) where: + outputs: A tuple (output_fw, output_bw) containing the forward and + the backward rnn output `Tensor`. + If time_major == False (default), + output_fw will be a `Tensor` shaped: + `[batch_size, max_time, cell_fw.output_size]` + and output_bw will be a `Tensor` shaped: + `[batch_size, max_time, cell_bw.output_size]`. + If time_major == True, + output_fw will be a `Tensor` shaped: + `[max_time, batch_size, cell_fw.output_size]` + and output_bw will be a `Tensor` shaped: + `[max_time, batch_size, cell_bw.output_size]`. + It returns a tuple instead of a single concatenated `Tensor`, unlike + in the `bidirectional_rnn`. If the concatenated one is preferred, + the forward and backward outputs can be concatenated as + `tf.concat(outputs, 2)`. + output_states: A tuple (output_state_fw, output_state_bw) containing + the forward and the backward final states of bidirectional rnn. + + Raises: + TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. + """ + + if not _like_rnncell(cell_fw): + raise TypeError("cell_fw must be an instance of RNNCell") + if not _like_rnncell(cell_bw): + raise TypeError("cell_bw must be an instance of RNNCell") + + with vs.variable_scope(scope or "bidirectional_rnn"): + # Forward direction + with vs.variable_scope("fw") as fw_scope: + output_fw, output_state_fw = dynamic_rnn( + cell=cell_fw, inputs=inputs, sequence_length=sequence_length, + initial_state=initial_state_fw, dtype=dtype, + parallel_iterations=parallel_iterations, swap_memory=swap_memory, + time_major=time_major, scope=fw_scope) + + # Backward direction + if not time_major: + time_dim = 1 + batch_dim = 0 + else: + time_dim = 0 + batch_dim = 1 + + def _reverse(input_, seq_lengths, seq_dim, batch_dim): + if seq_lengths is not None: + return array_ops.reverse_sequence( + input=input_, seq_lengths=seq_lengths, + seq_dim=seq_dim, batch_dim=batch_dim) + else: + return array_ops.reverse(input_, axis=[seq_dim]) + + with vs.variable_scope("bw") as bw_scope: + inputs_reverse = _reverse( + inputs, seq_lengths=sequence_length, + seq_dim=time_dim, batch_dim=batch_dim) + tmp, output_state_bw = dynamic_rnn( + cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, + initial_state=initial_state_bw, dtype=dtype, + parallel_iterations=parallel_iterations, swap_memory=swap_memory, + time_major=time_major, scope=bw_scope) + + output_bw = _reverse( + tmp, seq_lengths=sequence_length, + seq_dim=time_dim, batch_dim=batch_dim) + + outputs = (output_fw, output_bw) + output_states = (output_state_fw, output_state_bw) + + return (outputs, output_states) + + +def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, + dtype=None, parallel_iterations=None, swap_memory=False, + time_major=False, scope=None): + """Creates a recurrent neural network specified by RNNCell `cell`. + + Performs fully dynamic unrolling of `inputs`. + + Example: + + ```python + # create a BasicRNNCell + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + + # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] + + # defining initial state + initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) + + # 'state' is a tensor of shape [batch_size, cell_state_size] + outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, + initial_state=initial_state, + dtype=tf.float32) + ``` + + ```python + # create 2 LSTMCells + rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] + + # create a RNN cell composed sequentially of a number of RNNCells + multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) + + # 'outputs' is a tensor of shape [batch_size, max_time, 256] + # 'state' is a N-tuple where N is the number of LSTMCells containing a + # tf.contrib.rnn.LSTMStateTuple for each cell + outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, + inputs=data, + dtype=tf.float32) + ``` + + + Args: + cell: An instance of RNNCell. + inputs: The RNN inputs. + If `time_major == False` (default), this must be a `Tensor` of shape: + `[batch_size, max_time, ...]`, or a nested tuple of such + elements. + If `time_major == True`, this must be a `Tensor` of shape: + `[max_time, batch_size, ...]`, or a nested tuple of such + elements. + This may also be a (possibly nested) tuple of Tensors satisfying + this property. The first two dimensions must match across all the inputs, + but otherwise the ranks and other shape components may differ. + In this case, input to `cell` at each time-step will replicate the + structure of these tuples, except for the time dimension (from which the + time is taken). + The input to `cell` at each time step will be a `Tensor` or (possibly + nested) tuple of Tensors each with dimensions `[batch_size, ...]`. + sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. + Used to copy-through state and zero-out outputs when past a batch + element's sequence length. So it's more for correctness than performance. + initial_state: (optional) An initial state for the RNN. + If `cell.state_size` is an integer, this must be + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. + If `cell.state_size` is a tuple, this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell.state_size`. + dtype: (optional) The data type for the initial state and expected output. + Required if initial_state is not provided or RNN state has a heterogeneous + dtype. + parallel_iterations: (Default: 32). The number of iterations to run in + parallel. Those operations which do not have any temporal dependency + and can be run in parallel, will be. This parameter trades off + time for space. Values >> 1 use more memory but take less time, + while smaller values use less memory but computations take longer. + swap_memory: Transparently swap the tensors produced in forward inference + but needed for back prop from GPU to CPU. This allows training RNNs + which would typically not fit on a single GPU, with very minimal (or no) + performance penalty. + time_major: The shape format of the `inputs` and `outputs` Tensors. + If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. + If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. + Using `time_major = True` is a bit more efficient because it avoids + transposes at the beginning and end of the RNN calculation. However, + most TensorFlow data is batch-major, so by default this function + accepts input and emits output in batch-major form. + scope: VariableScope for the created subgraph; defaults to "rnn". + + Returns: + A pair (outputs, state) where: + + outputs: The RNN output `Tensor`. + + If time_major == False (default), this will be a `Tensor` shaped: + `[batch_size, max_time, cell.output_size]`. + + If time_major == True, this will be a `Tensor` shaped: + `[max_time, batch_size, cell.output_size]`. + + Note, if `cell.output_size` is a (possibly nested) tuple of integers + or `TensorShape` objects, then `outputs` will be a tuple having the + same structure as `cell.output_size`, containing Tensors having shapes + corresponding to the shape data in `cell.output_size`. + + state: The final state. If `cell.state_size` is an int, this + will be shaped `[batch_size, cell.state_size]`. If it is a + `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. + If it is a (possibly nested) tuple of ints or `TensorShape`, this will + be a tuple having the corresponding shapes. If cells are `LSTMCells` + `state` will be a tuple containing a `LSTMStateTuple` for each cell. + + Raises: + TypeError: If `cell` is not an instance of RNNCell. + ValueError: If inputs is None or an empty list. + """ + if not _like_rnncell(cell): + raise TypeError("cell must be an instance of RNNCell") + + # By default, time_major==False and inputs are batch-major: shaped + # [batch, time, depth] + # For internal calculations, we transpose to [time, batch, depth] + flat_input = nest.flatten(inputs) + + if not time_major: + # (B,T,D) => (T,B,D) + flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] + flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) + + parallel_iterations = parallel_iterations or 32 + if sequence_length is not None: + sequence_length = math_ops.to_int32(sequence_length) + if sequence_length.get_shape().ndims not in (None, 1): + raise ValueError( + "sequence_length must be a vector of length batch_size, " + "but saw shape: %s" % sequence_length.get_shape()) + sequence_length = array_ops.identity( # Just to find it in the graph. + sequence_length, name="sequence_length") + + # Create a new scope in which the caching device is either + # determined by the parent scope, or is set to place the cached + # Variable using the same placement as for the rest of the RNN. + with vs.variable_scope(scope or "rnn") as varscope: + if varscope.caching_device is None: + varscope.set_caching_device(lambda op: op.device) + batch_size = _best_effort_input_batch_size(flat_input) + + if initial_state is not None: + state = initial_state + else: + if not dtype: + raise ValueError("If there is no initial_state, you must give a dtype.") + state = cell.zero_state(batch_size, dtype) + + def _assert_has_shape(x, shape): + x_shape = array_ops.shape(x) + packed_shape = array_ops.stack(shape) + return control_flow_ops.Assert( + math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), + ["Expected shape for Tensor %s is " % x.name, + packed_shape, " but saw shape: ", x_shape]) + + if sequence_length is not None: + # Perform some shape validation + with ops.control_dependencies( + [_assert_has_shape(sequence_length, [batch_size])]): + sequence_length = array_ops.identity( + sequence_length, name="CheckSeqLen") + + inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) + + (outputs, final_state) = _dynamic_rnn_loop( + cell, + inputs, + state, + parallel_iterations=parallel_iterations, + swap_memory=swap_memory, + att_scores = att_scores, + sequence_length=sequence_length, + dtype=dtype) + + # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. + # If we are performing batch-major calculations, transpose output back + # to shape [batch, time, depth] + if not time_major: + # (T,B,D) => (B,T,D) + outputs = nest.map_structure(_transpose_batch_time, outputs) + + return (outputs, final_state) + + +def _dynamic_rnn_loop(cell, + inputs, + initial_state, + parallel_iterations, + swap_memory, + att_scores = None, + sequence_length=None, + dtype=None): + """Internal implementation of Dynamic RNN. + + Args: + cell: An instance of RNNCell. + inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested + tuple of such elements. + initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if + `cell.state_size` is a tuple, then this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell.state_size`. + parallel_iterations: Positive Python int. + swap_memory: A Python boolean + sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. + dtype: (optional) Expected dtype of output. If not specified, inferred from + initial_state. + + Returns: + Tuple `(final_outputs, final_state)`. + final_outputs: + A `Tensor` of shape `[time, batch_size, cell.output_size]`. If + `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` + objects, then this returns a (possibly nsted) tuple of Tensors matching + the corresponding shapes. + final_state: + A `Tensor`, or possibly nested tuple of Tensors, matching in length + and shapes to `initial_state`. + + Raises: + ValueError: If the input depth cannot be inferred via shape inference + from the inputs. + """ + state = initial_state + assert isinstance(parallel_iterations, int), "parallel_iterations must be int" + + state_size = cell.state_size + + flat_input = nest.flatten(inputs) + flat_output_size = nest.flatten(cell.output_size) + + # Construct an initial output + input_shape = array_ops.shape(flat_input[0]) + time_steps = input_shape[0] + batch_size = _best_effort_input_batch_size(flat_input) + + inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) + for input_ in flat_input) + + const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] + + for shape in inputs_got_shape: + if not shape[2:].is_fully_defined(): + raise ValueError( + "Input size (depth of inputs) must be accessible via shape inference," + " but saw value None.") + got_time_steps = shape[0].value + got_batch_size = shape[1].value + if const_time_steps != got_time_steps: + raise ValueError( + "Time steps is not the same for all the elements in the input in a " + "batch.") + if const_batch_size != got_batch_size: + raise ValueError( + "Batch_size is not the same for all the elements in the input.") + + # Prepare dynamic conditional copying of state & output + def _create_zero_arrays(size): + size = _concat(batch_size, size) + return array_ops.zeros( + array_ops.stack(size), _infer_state_dtype(dtype, state)) + + flat_zero_output = tuple(_create_zero_arrays(output) + for output in flat_output_size) + zero_output = nest.pack_sequence_as(structure=cell.output_size, + flat_sequence=flat_zero_output) + + if sequence_length is not None: + min_sequence_length = math_ops.reduce_min(sequence_length) + max_sequence_length = math_ops.reduce_max(sequence_length) + + time = array_ops.constant(0, dtype=dtypes.int32, name="time") + + with ops.name_scope("dynamic_rnn") as scope: + base_name = scope + + def _create_ta(name, dtype): + return tensor_array_ops.TensorArray(dtype=dtype, + size=time_steps, + tensor_array_name=base_name + name) + + output_ta = tuple(_create_ta("output_%d" % i, + _infer_state_dtype(dtype, state)) + for i in range(len(flat_output_size))) + input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) + for i in range(len(flat_input))) + + input_ta = tuple(ta.unstack(input_) + for ta, input_ in zip(input_ta, flat_input)) + + def _time_step(time, output_ta_t, state, att_scores=None): + """Take a time step of the dynamic RNN. + + Args: + time: int32 scalar Tensor. + output_ta_t: List of `TensorArray`s that represent the output. + state: nested tuple of vector tensors that represent the state. + + Returns: + The tuple (time + 1, output_ta_t with updated flow, new_state). + """ + + input_t = tuple(ta.read(time) for ta in input_ta) + # Restore some shape information + for input_, shape in zip(input_t, inputs_got_shape): + input_.set_shape(shape[1:]) + + input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) + if att_scores is not None: + att_score = att_scores[:, time, :] + call_cell = lambda: cell(input_t, state, att_score) + else: + call_cell = lambda: cell(input_t, state) + + if sequence_length is not None: + (output, new_state) = _rnn_step( + time=time, + sequence_length=sequence_length, + min_sequence_length=min_sequence_length, + max_sequence_length=max_sequence_length, + zero_output=zero_output, + state=state, + call_cell=call_cell, + state_size=state_size, + skip_conditionals=True) + else: + (output, new_state) = call_cell() + + # Pack state if using state tuples + output = nest.flatten(output) + + output_ta_t = tuple( + ta.write(time, out) for ta, out in zip(output_ta_t, output)) + if att_scores is not None: + return (time + 1, output_ta_t, new_state, att_scores) + else: + return (time + 1, output_ta_t, new_state) + + if att_scores is not None: + _, output_final_ta, final_state, _ = control_flow_ops.while_loop( + cond=lambda time, *_: time < time_steps, + body=_time_step, + loop_vars=(time, output_ta, state, att_scores), + parallel_iterations=parallel_iterations, + swap_memory=swap_memory) + else: + _, output_final_ta, final_state = control_flow_ops.while_loop( + cond=lambda time, *_: time < time_steps, + body=_time_step, + loop_vars=(time, output_ta, state), + parallel_iterations=parallel_iterations, + swap_memory=swap_memory) + + # Unpack final output if not using output tuples. + final_outputs = tuple(ta.stack() for ta in output_final_ta) + + # Restore some shape information + for output, output_size in zip(final_outputs, flat_output_size): + shape = _concat( + [const_time_steps, const_batch_size], output_size, static=True) + output.set_shape(shape) + + final_outputs = nest.pack_sequence_as( + structure=cell.output_size, flat_sequence=final_outputs) + + return (final_outputs, final_state) + + +def raw_rnn(cell, loop_fn, + parallel_iterations=None, swap_memory=False, scope=None): + """Creates an `RNN` specified by RNNCell `cell` and loop function `loop_fn`. + + **NOTE: This method is still in testing, and the API may change.** + + This function is a more primitive version of `dynamic_rnn` that provides + more direct access to the inputs each iteration. It also provides more + control over when to start and finish reading the sequence, and + what to emit for the output. + + For example, it can be used to implement the dynamic decoder of a seq2seq + model. + + Instead of working with `Tensor` objects, most operations work with + `TensorArray` objects directly. + + The operation of `raw_rnn`, in pseudo-code, is basically the following: + + ```python + time = tf.constant(0, dtype=tf.int32) + (finished, next_input, initial_state, _, loop_state) = loop_fn( + time=time, cell_output=None, cell_state=None, loop_state=None) + emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype) + state = initial_state + while not all(finished): + (output, cell_state) = cell(next_input, state) + (next_finished, next_input, next_state, emit, loop_state) = loop_fn( + time=time + 1, cell_output=output, cell_state=cell_state, + loop_state=loop_state) + # Emit zeros and copy forward state for minibatch entries that are finished. + state = tf.where(finished, state, next_state) + emit = tf.where(finished, tf.zeros_like(emit), emit) + emit_ta = emit_ta.write(time, emit) + # If any new minibatch entries are marked as finished, mark these. + finished = tf.logical_or(finished, next_finished) + time += 1 + return (emit_ta, state, loop_state) + ``` + + with the additional properties that output and state may be (possibly nested) + tuples, as determined by `cell.output_size` and `cell.state_size`, and + as a result the final `state` and `emit_ta` may themselves be tuples. + + A simple implementation of `dynamic_rnn` via `raw_rnn` looks like this: + + ```python + inputs = tf.placeholder(shape=(max_time, batch_size, input_depth), + dtype=tf.float32) + sequence_length = tf.placeholder(shape=(batch_size,), dtype=tf.int32) + inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time) + inputs_ta = inputs_ta.unstack(inputs) + + cell = tf.contrib.rnn.LSTMCell(num_units) + + def loop_fn(time, cell_output, cell_state, loop_state): + emit_output = cell_output # == None for time == 0 + if cell_output is None: # time == 0 + next_cell_state = cell.zero_state(batch_size, tf.float32) + else: + next_cell_state = cell_state + elements_finished = (time >= sequence_length) + finished = tf.reduce_all(elements_finished) + next_input = tf.cond( + finished, + lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32), + lambda: inputs_ta.read(time)) + next_loop_state = None + return (elements_finished, next_input, next_cell_state, + emit_output, next_loop_state) + + outputs_ta, final_state, _ = raw_rnn(cell, loop_fn) + outputs = outputs_ta.stack() + ``` + + Args: + cell: An instance of RNNCell. + loop_fn: A callable that takes inputs + `(time, cell_output, cell_state, loop_state)` + and returns the tuple + `(finished, next_input, next_cell_state, emit_output, next_loop_state)`. + Here `time` is an int32 scalar `Tensor`, `cell_output` is a + `Tensor` or (possibly nested) tuple of tensors as determined by + `cell.output_size`, and `cell_state` is a `Tensor` + or (possibly nested) tuple of tensors, as determined by the `loop_fn` + on its first call (and should match `cell.state_size`). + The outputs are: `finished`, a boolean `Tensor` of + shape `[batch_size]`, `next_input`: the next input to feed to `cell`, + `next_cell_state`: the next state to feed to `cell`, + and `emit_output`: the output to store for this iteration. + + Note that `emit_output` should be a `Tensor` or (possibly nested) + tuple of tensors with shapes and structure matching `cell.output_size` + and `cell_output` above. The parameter `cell_state` and output + `next_cell_state` may be either a single or (possibly nested) tuple + of tensors. The parameter `loop_state` and + output `next_loop_state` may be either a single or (possibly nested) tuple + of `Tensor` and `TensorArray` objects. This last parameter + may be ignored by `loop_fn` and the return value may be `None`. If it + is not `None`, then the `loop_state` will be propagated through the RNN + loop, for use purely by `loop_fn` to keep track of its own state. + The `next_loop_state` parameter returned may be `None`. + + The first call to `loop_fn` will be `time = 0`, `cell_output = None`, + `cell_state = None`, and `loop_state = None`. For this call: + The `next_cell_state` value should be the value with which to initialize + the cell's state. It may be a final state from a previous RNN or it + may be the output of `cell.zero_state()`. It should be a + (possibly nested) tuple structure of tensors. + If `cell.state_size` is an integer, this must be + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. + If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of + appropriate type and shape `[batch_size] + cell.state_size`. + If `cell.state_size` is a (possibly nested) tuple of ints or + `TensorShape`, this will be a tuple having the corresponding shapes. + The `emit_output` value may be either `None` or a (possibly nested) + tuple structure of tensors, e.g., + `(tf.zeros(shape_0, dtype=dtype_0), tf.zeros(shape_1, dtype=dtype_1))`. + If this first `emit_output` return value is `None`, + then the `emit_ta` result of `raw_rnn` will have the same structure and + dtypes as `cell.output_size`. Otherwise `emit_ta` will have the same + structure, shapes (prepended with a `batch_size` dimension), and dtypes + as `emit_output`. The actual values returned for `emit_output` at this + initializing call are ignored. Note, this emit structure must be + consistent across all time steps. + + parallel_iterations: (Default: 32). The number of iterations to run in + parallel. Those operations which do not have any temporal dependency + and can be run in parallel, will be. This parameter trades off + time for space. Values >> 1 use more memory but take less time, + while smaller values use less memory but computations take longer. + swap_memory: Transparently swap the tensors produced in forward inference + but needed for back prop from GPU to CPU. This allows training RNNs + which would typically not fit on a single GPU, with very minimal (or no) + performance penalty. + scope: VariableScope for the created subgraph; defaults to "rnn". + + Returns: + A tuple `(emit_ta, final_state, final_loop_state)` where: + + `emit_ta`: The RNN output `TensorArray`. + If `loop_fn` returns a (possibly nested) set of Tensors for + `emit_output` during initialization, (inputs `time = 0`, + `cell_output = None`, and `loop_state = None`), then `emit_ta` will + have the same structure, dtypes, and shapes as `emit_output` instead. + If `loop_fn` returns `emit_output = None` during this call, + the structure of `cell.output_size` is used: + If `cell.output_size` is a (possibly nested) tuple of integers + or `TensorShape` objects, then `emit_ta` will be a tuple having the + same structure as `cell.output_size`, containing TensorArrays whose + elements' shapes correspond to the shape data in `cell.output_size`. + + `final_state`: The final cell state. If `cell.state_size` is an int, this + will be shaped `[batch_size, cell.state_size]`. If it is a + `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. + If it is a (possibly nested) tuple of ints or `TensorShape`, this will + be a tuple having the corresponding shapes. + + `final_loop_state`: The final loop state as returned by `loop_fn`. + + Raises: + TypeError: If `cell` is not an instance of RNNCell, or `loop_fn` is not + a `callable`. + """ + + if not _like_rnncell(cell): + raise TypeError("cell must be an instance of RNNCell") + if not callable(loop_fn): + raise TypeError("loop_fn must be a callable") + + parallel_iterations = parallel_iterations or 32 + + # Create a new scope in which the caching device is either + # determined by the parent scope, or is set to place the cached + # Variable using the same placement as for the rest of the RNN. + with vs.variable_scope(scope or "rnn") as varscope: + if varscope.caching_device is None: + varscope.set_caching_device(lambda op: op.device) + + time = constant_op.constant(0, dtype=dtypes.int32) + (elements_finished, next_input, initial_state, emit_structure, + init_loop_state) = loop_fn( + time, None, None, None) # time, cell_output, cell_state, loop_state + flat_input = nest.flatten(next_input) + + # Need a surrogate loop state for the while_loop if none is available. + loop_state = (init_loop_state if init_loop_state is not None + else constant_op.constant(0, dtype=dtypes.int32)) + + input_shape = [input_.get_shape() for input_ in flat_input] + static_batch_size = input_shape[0][0] + + for input_shape_i in input_shape: + # Static verification that batch sizes all match + static_batch_size.merge_with(input_shape_i[0]) + + batch_size = static_batch_size.value + if batch_size is None: + batch_size = array_ops.shape(flat_input[0])[0] + + nest.assert_same_structure(initial_state, cell.state_size) + state = initial_state + flat_state = nest.flatten(state) + flat_state = [ops.convert_to_tensor(s) for s in flat_state] + state = nest.pack_sequence_as(structure=state, + flat_sequence=flat_state) + + if emit_structure is not None: + flat_emit_structure = nest.flatten(emit_structure) + flat_emit_size = [emit.shape if emit.shape.is_fully_defined() else + array_ops.shape(emit) for emit in flat_emit_structure] + flat_emit_dtypes = [emit.dtype for emit in flat_emit_structure] + else: + emit_structure = cell.output_size + flat_emit_size = nest.flatten(emit_structure) + flat_emit_dtypes = [flat_state[0].dtype] * len(flat_emit_size) + + flat_emit_ta = [ + tensor_array_ops.TensorArray( + dtype=dtype_i, dynamic_size=True, size=0, name="rnn_output_%d" % i) + for i, dtype_i in enumerate(flat_emit_dtypes)] + emit_ta = nest.pack_sequence_as(structure=emit_structure, + flat_sequence=flat_emit_ta) + flat_zero_emit = [ + array_ops.zeros(_concat(batch_size, size_i), dtype_i) + for size_i, dtype_i in zip(flat_emit_size, flat_emit_dtypes)] + zero_emit = nest.pack_sequence_as(structure=emit_structure, + flat_sequence=flat_zero_emit) + + def condition(unused_time, elements_finished, *_): + return math_ops.logical_not(math_ops.reduce_all(elements_finished)) + + def body(time, elements_finished, current_input, + emit_ta, state, loop_state): + """Internal while loop body for raw_rnn. + + Args: + time: time scalar. + elements_finished: batch-size vector. + current_input: possibly nested tuple of input tensors. + emit_ta: possibly nested tuple of output TensorArrays. + state: possibly nested tuple of state tensors. + loop_state: possibly nested tuple of loop state tensors. + + Returns: + Tuple having the same size as Args but with updated values. + """ + (next_output, cell_state) = cell(current_input, state) + + nest.assert_same_structure(state, cell_state) + nest.assert_same_structure(cell.output_size, next_output) + + next_time = time + 1 + (next_finished, next_input, next_state, emit_output, + next_loop_state) = loop_fn( + next_time, next_output, cell_state, loop_state) + + nest.assert_same_structure(state, next_state) + nest.assert_same_structure(current_input, next_input) + nest.assert_same_structure(emit_ta, emit_output) + + # If loop_fn returns None for next_loop_state, just reuse the + # previous one. + loop_state = loop_state if next_loop_state is None else next_loop_state + + def _copy_some_through(current, candidate): + """Copy some tensors through via array_ops.where.""" + def copy_fn(cur_i, cand_i): + with ops.colocate_with(cand_i): + return array_ops.where(elements_finished, cur_i, cand_i) + return nest.map_structure(copy_fn, current, candidate) + + emit_output = _copy_some_through(zero_emit, emit_output) + next_state = _copy_some_through(state, next_state) + + emit_ta = nest.map_structure( + lambda ta, emit: ta.write(time, emit), emit_ta, emit_output) + + elements_finished = math_ops.logical_or(elements_finished, next_finished) + + return (next_time, elements_finished, next_input, + emit_ta, next_state, loop_state) + + returned = control_flow_ops.while_loop( + condition, body, loop_vars=[ + time, elements_finished, next_input, + emit_ta, state, loop_state], + parallel_iterations=parallel_iterations, + swap_memory=swap_memory) + + (emit_ta, final_state, final_loop_state) = returned[-3:] + + if init_loop_state is None: + final_loop_state = None + + return (emit_ta, final_state, final_loop_state) + + +def static_rnn(cell, + inputs, + initial_state=None, + dtype=None, + sequence_length=None, + scope=None): + """Creates a recurrent neural network specified by RNNCell `cell`. + + The simplest form of RNN network generated is: + + ```python + state = cell.zero_state(...) + outputs = [] + for input_ in inputs: + output, state = cell(input_, state) + outputs.append(output) + return (outputs, state) + ``` + However, a few other options are available: + + An initial state can be provided. + If the sequence_length vector is provided, dynamic calculation is performed. + This method of calculation does not compute the RNN steps past the maximum + sequence length of the minibatch (thus saving computational time), + and properly propagates the state at an example's sequence length + to the final state output. + + The dynamic calculation performed is, at time `t` for batch row `b`, + + ```python + (output, state)(b, t) = + (t >= sequence_length(b)) + ? (zeros(cell.output_size), states(b, sequence_length(b) - 1)) + : cell(input(b, t), state(b, t - 1)) + ``` + + Args: + cell: An instance of RNNCell. + inputs: A length T list of inputs, each a `Tensor` of shape + `[batch_size, input_size]`, or a nested tuple of such elements. + initial_state: (optional) An initial state for the RNN. + If `cell.state_size` is an integer, this must be + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. + If `cell.state_size` is a tuple, this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell.state_size`. + dtype: (optional) The data type for the initial state and expected output. + Required if initial_state is not provided or RNN state has a heterogeneous + dtype. + sequence_length: Specifies the length of each sequence in inputs. + An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`. + scope: VariableScope for the created subgraph; defaults to "rnn". + + Returns: + A pair (outputs, state) where: + + - outputs is a length T list of outputs (one for each input), or a nested + tuple of such elements. + - state is the final state + + Raises: + TypeError: If `cell` is not an instance of RNNCell. + ValueError: If `inputs` is `None` or an empty list, or if the input depth + (column size) cannot be inferred from inputs via shape inference. + """ + + if not _like_rnncell(cell): + raise TypeError("cell must be an instance of RNNCell") + if not nest.is_sequence(inputs): + raise TypeError("inputs must be a sequence") + if not inputs: + raise ValueError("inputs must not be empty") + + outputs = [] + # Create a new scope in which the caching device is either + # determined by the parent scope, or is set to place the cached + # Variable using the same placement as for the rest of the RNN. + with vs.variable_scope(scope or "rnn") as varscope: + if varscope.caching_device is None: + varscope.set_caching_device(lambda op: op.device) + + # Obtain the first sequence of the input + first_input = inputs + while nest.is_sequence(first_input): + first_input = first_input[0] + + # Temporarily avoid EmbeddingWrapper and seq2seq badness + # TODO(lukaszkaiser): remove EmbeddingWrapper + if first_input.get_shape().ndims != 1: + + input_shape = first_input.get_shape().with_rank_at_least(2) + fixed_batch_size = input_shape[0] + + flat_inputs = nest.flatten(inputs) + for flat_input in flat_inputs: + input_shape = flat_input.get_shape().with_rank_at_least(2) + batch_size, input_size = input_shape[0], input_shape[1:] + fixed_batch_size.merge_with(batch_size) + for i, size in enumerate(input_size): + if size.value is None: + raise ValueError( + "Input size (dimension %d of inputs) must be accessible via " + "shape inference, but saw value None." % i) + else: + fixed_batch_size = first_input.get_shape().with_rank_at_least(1)[0] + + if fixed_batch_size.value: + batch_size = fixed_batch_size.value + else: + batch_size = array_ops.shape(first_input)[0] + if initial_state is not None: + state = initial_state + else: + if not dtype: + raise ValueError("If no initial_state is provided, " + "dtype must be specified") + state = cell.zero_state(batch_size, dtype) + + if sequence_length is not None: # Prepare variables + sequence_length = ops.convert_to_tensor( + sequence_length, name="sequence_length") + if sequence_length.get_shape().ndims not in (None, 1): + raise ValueError( + "sequence_length must be a vector of length batch_size") + + def _create_zero_output(output_size): + # convert int to TensorShape if necessary + size = _concat(batch_size, output_size) + output = array_ops.zeros( + array_ops.stack(size), _infer_state_dtype(dtype, state)) + shape = _concat(fixed_batch_size.value, output_size, static=True) + output.set_shape(tensor_shape.TensorShape(shape)) + return output + + output_size = cell.output_size + flat_output_size = nest.flatten(output_size) + flat_zero_output = tuple( + _create_zero_output(size) for size in flat_output_size) + zero_output = nest.pack_sequence_as( + structure=output_size, flat_sequence=flat_zero_output) + + sequence_length = math_ops.to_int32(sequence_length) + min_sequence_length = math_ops.reduce_min(sequence_length) + max_sequence_length = math_ops.reduce_max(sequence_length) + + for time, input_ in enumerate(inputs): + if time > 0: + varscope.reuse_variables() + # pylint: disable=cell-var-from-loop + call_cell = lambda: cell(input_, state) + # pylint: enable=cell-var-from-loop + if sequence_length is not None: + (output, state) = _rnn_step( + time=time, + sequence_length=sequence_length, + min_sequence_length=min_sequence_length, + max_sequence_length=max_sequence_length, + zero_output=zero_output, + state=state, + call_cell=call_cell, + state_size=cell.state_size) + else: + (output, state) = call_cell() + + outputs.append(output) + + return (outputs, state) + + +def static_state_saving_rnn(cell, + inputs, + state_saver, + state_name, + sequence_length=None, + scope=None): + """RNN that accepts a state saver for time-truncated RNN calculation. + + Args: + cell: An instance of `RNNCell`. + inputs: A length T list of inputs, each a `Tensor` of shape + `[batch_size, input_size]`. + state_saver: A state saver object with methods `state` and `save_state`. + state_name: Python string or tuple of strings. The name to use with the + state_saver. If the cell returns tuples of states (i.e., + `cell.state_size` is a tuple) then `state_name` should be a tuple of + strings having the same length as `cell.state_size`. Otherwise it should + be a single string. + sequence_length: (optional) An int32/int64 vector size [batch_size]. + See the documentation for rnn() for more details about sequence_length. + scope: VariableScope for the created subgraph; defaults to "rnn". + + Returns: + A pair (outputs, state) where: + outputs is a length T list of outputs (one for each input) + states is the final state + + Raises: + TypeError: If `cell` is not an instance of RNNCell. + ValueError: If `inputs` is `None` or an empty list, or if the arity and + type of `state_name` does not match that of `cell.state_size`. + """ + state_size = cell.state_size + state_is_tuple = nest.is_sequence(state_size) + state_name_tuple = nest.is_sequence(state_name) + + if state_is_tuple != state_name_tuple: + raise ValueError("state_name should be the same type as cell.state_size. " + "state_name: %s, cell.state_size: %s" % (str(state_name), + str(state_size))) + + if state_is_tuple: + state_name_flat = nest.flatten(state_name) + state_size_flat = nest.flatten(state_size) + + if len(state_name_flat) != len(state_size_flat): + raise ValueError("#elems(state_name) != #elems(state_size): %d vs. %d" % + (len(state_name_flat), len(state_size_flat))) + + initial_state = nest.pack_sequence_as( + structure=state_size, + flat_sequence=[state_saver.state(s) for s in state_name_flat]) + else: + initial_state = state_saver.state(state_name) + + (outputs, state) = static_rnn( + cell, + inputs, + initial_state=initial_state, + sequence_length=sequence_length, + scope=scope) + + if state_is_tuple: + flat_state = nest.flatten(state) + state_name = nest.flatten(state_name) + save_state = [ + state_saver.save_state(name, substate) + for name, substate in zip(state_name, flat_state) + ] + else: + save_state = [state_saver.save_state(state_name, state)] + + with ops.control_dependencies(save_state): + last_output = outputs[-1] + flat_last_output = nest.flatten(last_output) + flat_last_output = [ + array_ops.identity(output) for output in flat_last_output + ] + outputs[-1] = nest.pack_sequence_as( + structure=last_output, flat_sequence=flat_last_output) + + return (outputs, state) + + +def static_bidirectional_rnn(cell_fw, + cell_bw, + inputs, + initial_state_fw=None, + initial_state_bw=None, + dtype=None, + sequence_length=None, + scope=None): + """Creates a bidirectional recurrent neural network. + + Similar to the unidirectional case above (rnn) but takes input and builds + independent forward and backward RNNs with the final forward and backward + outputs depth-concatenated, such that the output will have the format + [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of + forward and backward cell must match. The initial state for both directions + is zero by default (but can be set optionally) and no intermediate states are + ever returned -- the network is fully unrolled for the given (passed in) + length(s) of the sequence(s) or completely unrolled if length(s) is not given. + + Args: + cell_fw: An instance of RNNCell, to be used for forward direction. + cell_bw: An instance of RNNCell, to be used for backward direction. + inputs: A length T list of inputs, each a tensor of shape + [batch_size, input_size], or a nested tuple of such elements. + initial_state_fw: (optional) An initial state for the forward RNN. + This must be a tensor of appropriate type and shape + `[batch_size, cell_fw.state_size]`. + If `cell_fw.state_size` is a tuple, this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. + initial_state_bw: (optional) Same as for `initial_state_fw`, but using + the corresponding properties of `cell_bw`. + dtype: (optional) The data type for the initial state. Required if + either of the initial states are not provided. + sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, + containing the actual lengths for each of the sequences. + scope: VariableScope for the created subgraph; defaults to + "bidirectional_rnn" + + Returns: + A tuple (outputs, output_state_fw, output_state_bw) where: + outputs is a length `T` list of outputs (one for each input), which + are depth-concatenated forward and backward outputs. + output_state_fw is the final state of the forward rnn. + output_state_bw is the final state of the backward rnn. + + Raises: + TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. + ValueError: If inputs is None or an empty list. + """ + + if not _like_rnncell(cell_fw): + raise TypeError("cell_fw must be an instance of RNNCell") + if not _like_rnncell(cell_bw): + raise TypeError("cell_bw must be an instance of RNNCell") + if not nest.is_sequence(inputs): + raise TypeError("inputs must be a sequence") + if not inputs: + raise ValueError("inputs must not be empty") + + with vs.variable_scope(scope or "bidirectional_rnn"): + # Forward direction + with vs.variable_scope("fw") as fw_scope: + output_fw, output_state_fw = static_rnn( + cell_fw, + inputs, + initial_state_fw, + dtype, + sequence_length, + scope=fw_scope) + + # Backward direction + with vs.variable_scope("bw") as bw_scope: + reversed_inputs = _reverse_seq(inputs, sequence_length) + tmp, output_state_bw = static_rnn( + cell_bw, + reversed_inputs, + initial_state_bw, + dtype, + sequence_length, + scope=bw_scope) + + output_bw = _reverse_seq(tmp, sequence_length) + # Concat each of the forward/backward outputs + flat_output_fw = nest.flatten(output_fw) + flat_output_bw = nest.flatten(output_bw) + + flat_outputs = tuple( + array_ops.concat([fw, bw], 1) + for fw, bw in zip(flat_output_fw, flat_output_bw)) + + outputs = nest.pack_sequence_as( + structure=output_fw, flat_sequence=flat_outputs) + + return (outputs, output_state_fw, output_state_bw) diff --git a/modelzoo/CAN/script/shuffle.py b/modelzoo/CAN/script/shuffle.py new file mode 100644 index 00000000000..3bc0b86d750 --- /dev/null +++ b/modelzoo/CAN/script/shuffle.py @@ -0,0 +1,42 @@ +import os +import sys +import random + +import tempfile +from subprocess import call + + +def main(file, temporary=False): + tf_os, tpath = tempfile.mkstemp(dir='~/DIN-V2-CODE') + tf = open(tpath, 'w') + + fd = open(file, "r") + for l in fd: + print >> tf, l.strip("\n") + tf.close() + + lines = open(tpath, 'r').readlines() + random.shuffle(lines) + if temporary: + path, filename = os.path.split(os.path.realpath(file)) + fd = tempfile.TemporaryFile(prefix=filename + '.shuf', dir=path) + else: + fd = open(file + '.shuf', 'w') + + for l in lines: + s = l.strip("\n") + print >> fd, s + + if temporary: + fd.seek(0) + else: + fd.close() + + os.remove(tpath) + + return fd + + +if __name__ == '__main__': + main(sys.argv[1]) + diff --git a/modelzoo/CAN/script/split_by_user.py b/modelzoo/CAN/script/split_by_user.py new file mode 100644 index 00000000000..9f570d97819 --- /dev/null +++ b/modelzoo/CAN/script/split_by_user.py @@ -0,0 +1,20 @@ +import random + +fi = open("/home/test/modelzoo/DIEN/data/local_test", "r") +ftrain = open("/home/test/modelzoo/DIEN/data/local_train_splitByUser", "w") +ftest = open("/home/test/modelzoo/DIEN/data/local_test_splitByUser", "w") + +while True: + rand_int = random.randint(1, 10) + noclk_line = fi.readline().strip() + clk_line = fi.readline().strip() + if noclk_line == "" or clk_line == "": + break + if rand_int == 2: + print(noclk_line,file=ftest) + print(clk_line,file=ftest) + else: + print(noclk_line,file=ftrain) + print(clk_line,file=ftrain) + + diff --git a/modelzoo/CAN/script/test.py b/modelzoo/CAN/script/test.py new file mode 100644 index 00000000000..64b9a7f3337 --- /dev/null +++ b/modelzoo/CAN/script/test.py @@ -0,0 +1,10 @@ +import os +import pandas as pd + +file = '/home/test/modelzoo/DIEN/data/local_train_splitByUser' +# if os.path.exists(file+'_neg') is True: +# print('YES') +# else: +# print('NOT') +data = pd.read_csv(file) +print(data.head()) \ No newline at end of file diff --git a/modelzoo/CAN/script/train.py b/modelzoo/CAN/script/train.py new file mode 100644 index 00000000000..bc1c8a8d97d --- /dev/null +++ b/modelzoo/CAN/script/train.py @@ -0,0 +1,293 @@ +import numpy +from data_iterator import DataIterator +import tensorflow as tf +from model import * +import time +import random +import sys +from utils import * +from tqdm import tqdm + +EMBEDDING_DIM = 18 +HIDDEN_SIZE = 18 * 2 +ATTENTION_SIZE = 18 * 2 +best_auc = 0.0 + +def prepare_data(input, target, maxlen = None, return_neg = False): + # x: a list of sentences + lengths_x = [len(s[4]) for s in input] + seqs_mid = [inp[3] for inp in input] + seqs_cat = [inp[4] for inp in input] + noclk_seqs_mid = [inp[5] for inp in input] + noclk_seqs_cat = [inp[6] for inp in input] + seqs_item_carte = [inp[7][0] for inp in input] + seqs_cate_carte = [inp[7][1] for inp in input] + + if maxlen is not None: + new_seqs_mid = [] + new_seqs_cat = [] + new_noclk_seqs_mid = [] + new_noclk_seqs_cat = [] + new_lengths_x = [] + new_seqs_item_carte = [] + new_seqs_cate_carte = [] + for l_x, inp in zip(lengths_x, input): + if l_x > maxlen: + new_seqs_mid.append(inp[3][l_x - maxlen:]) + new_seqs_cat.append(inp[4][l_x - maxlen:]) + new_noclk_seqs_mid.append(inp[5][l_x - maxlen:]) + new_noclk_seqs_cat.append(inp[6][l_x - maxlen:]) + new_seqs_item_carte.append(inp[7][0][l_x - maxlen:]) + new_seqs_cate_carte.append(inp[7][1][l_x - maxlen:]) + new_lengths_x.append(maxlen) + else: + new_seqs_mid.append(inp[3]) + new_seqs_cat.append(inp[4]) + new_noclk_seqs_mid.append(inp[5]) + new_noclk_seqs_cat.append(inp[6]) + new_seqs_item_carte.append(inp[7][0]) + new_seqs_cate_carte.append(inp[7][1]) + new_lengths_x.append(l_x) + lengths_x = new_lengths_x + seqs_mid = new_seqs_mid + seqs_cat = new_seqs_cat + noclk_seqs_mid = new_noclk_seqs_mid + noclk_seqs_cat = new_noclk_seqs_cat + seqs_item_carte = new_seqs_item_carte + seqs_cate_carte = new_seqs_cate_carte + + if len(lengths_x) < 1: + return None, None, None, None + + n_samples = len(seqs_mid) + maxlen_x = numpy.max(lengths_x) + neg_samples = len(noclk_seqs_mid[0][0]) + + mid_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') + cat_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') + noclk_mid_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') + noclk_cat_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') + item_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64') + cate_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64') + mid_mask = numpy.zeros((n_samples, maxlen_x)).astype('float32') + for idx, [s_x, s_y, no_sx, no_sy, i_c, c_c] in enumerate(zip(seqs_mid, seqs_cat, noclk_seqs_mid, noclk_seqs_cat, seqs_item_carte, seqs_cate_carte)): + mid_mask[idx, :lengths_x[idx]] = 1. + mid_his[idx, :lengths_x[idx]] = s_x + cat_his[idx, :lengths_x[idx]] = s_y + noclk_mid_his[idx, :lengths_x[idx], :] = no_sx + noclk_cat_his[idx, :lengths_x[idx], :] = no_sy + item_carte[idx, :lengths_x[idx]] = i_c + cate_carte[idx, :lengths_x[idx]] = c_c + + uids = numpy.array([inp[0] for inp in input]) + mids = numpy.array([inp[1] for inp in input]) + cats = numpy.array([inp[2] for inp in input]) + + carte = numpy.stack([item_carte, cate_carte], axis=1) + + if return_neg: + return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), noclk_mid_his, noclk_cat_his, carte + + else: + return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), carte + +def eval(sess, test_data, model, model_path): + + loss_sum = 0. + accuracy_sum = 0. + aux_loss_sum = 0. + nums = 0 + stored_arr = [] + for src, tgt in test_data: + nums += 1 + uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data(src, tgt, return_neg=True) + prob, loss, acc, aux_loss = model.calculate(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte]) + loss_sum += loss + aux_loss_sum = aux_loss + accuracy_sum += acc + prob_1 = prob[:, 0].tolist() + target_1 = target[:, 0].tolist() + for p ,t in zip(prob_1, target_1): + stored_arr.append([p, t]) + test_auc = calc_auc(stored_arr) + accuracy_sum = accuracy_sum / nums + loss_sum = loss_sum / nums + aux_loss_sum / nums + global best_auc + if best_auc < test_auc: + best_auc = test_auc + #model.save(sess, model_path) + return test_auc, loss_sum, accuracy_sum, aux_loss_sum + +def train( + train_file = "/home/test/modelzoo/DIEN/data/local_train_splitByUser", + test_file = "/home/test/modelzoo/DIEN/data/local_test_splitByUser", + uid_voc = "/home/test/modelzoo/CAN/data/uid_voc.pkl", + mid_voc = "/home/test/modelzoo/CAN/data/mid_voc.pkl", + cat_voc = "/home/test/modelzoo/CAN/data/cat_voc.pkl", + batch_size = 128, + maxlen = 100, + test_iter = 8400, + save_iter = 8400, + model_type = 'DNN', + seed = 2, +): + model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) + best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) + gpu_options = tf.GPUOptions(allow_growth=True) + with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: + label_type = 1 + train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False, label_type=label_type) + test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, label_type=label_type) + n_uid, n_mid, n_cat, n_carte = train_data.get_n() + if model_type == 'DNN': + model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,use_softmax=False) + elif model_type == 'Cartesion': + model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,use_softmax=False, use_cartes=True) + elif model_type == 'CAN+Cartesion': + model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True, use_cartes=True) + elif model_type == 'CAN': + model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True) + elif model_type == 'PNN': + model = Model_PNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) + elif model_type == 'ONN': + model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) + elif model_type == 'Wide': + model = Model_WideDeep(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'NCF': + model = Model_NCF(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'FM': + model = Model_FM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) + elif model_type == 'FFM': + model = Model_FFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) + elif model_type == 'DeepFM': + model = Model_DeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) + elif model_type == 'DeepFFM': + model = Model_DeepFFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) + elif model_type == 'xDeepFM': + model = Model_xDeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) + elif model_type == 'ONN': + model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'DIN': + model = Model_DIN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'DIEN': + model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'CAN+DIEN': + model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True) + else: + print ("Invalid model_type : %s"% model_type) + return + print("Model: ", model_type) + sess.run(tf.global_variables_initializer()) + sess.run(tf.local_variables_initializer()) + sys.stdout.flush() + + count() + start_time = time.time() + iter = 0 + lr = 0.001 + + for itr in range(1) : + loss_sum = 0.0 + accuracy_sum = 0. + aux_loss_sum = 0. + print('train_data:',train_data) + for src, tgt in train_data: + uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data(src, tgt, maxlen, return_neg=True) + loss, acc, aux_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats, carte]) + loss_sum += loss + accuracy_sum += acc + aux_loss_sum += aux_loss + iter += 1 + sys.stdout.flush() + #if (iter % 100) == 0: + print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- train_aux_loss: %.4f' % (iter, loss_sum / 100, accuracy_sum / 100, aux_loss_sum / 100)) + loss_sum = 0.0 + accuracy_sum = 0.0 + aux_loss_sum = 0.0 + #if (iter % test_iter) == 0: + auc_, loss_, acc_, aux_ = eval(sess, test_data, model, best_model_path) + print('iter: %d --- test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % (iter, auc_, loss_, acc_, aux_)) + loss_sum = 0.0 + accuracy_sum = 0.0 + aux_loss_sum = 0.0 + if (iter % save_iter) == 0: + print('save model iter: %d' %(iter)) + model.save(sess, model_path+"--"+str(iter)) + print('time:%f',(time.time()-start_time)) + lr *= 0.5 + +def count_flops(graph): + flops = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.float_operation()) + print('FLOPs: {}'.format(flops.total_float_ops)) + +def count(): + total_parameters = 0 + for variable in tf.trainable_variables(): + # shape is an array of tf.Dimension + shape = variable.get_shape() + variable_parameters = 1 + for dim in shape: + variable_parameters *= dim.value + total_parameters += variable_parameters + print("Prameter: ", total_parameters) + +def test( + train_file = "/home/test/modelzoo/DIEN/data/local_train_splitByUser", + test_file = "/home/test/modelzoo/DIEN/data/local_test_splitByUser", + uid_voc = "/home/test/modelzoo/CAN/data/uid_voc.pkl", + mid_voc = "/home/test/modelzoo/CAN/data/mid_voc.pkl", + cat_voc = "/home/test/modelzoo/CAN/data/cat_voc.pkl", + batch_size = 128, + maxlen = 100, + model_type = 'DNN', + seed = 2 +): + + model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) + gpu_options = tf.GPUOptions(allow_growth=True) + with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: + train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) + test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) + n_uid, n_mid, n_cat = train_data.get_n() + if model_type == 'DNN': + model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'PNN': + model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'Wide': + model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'DIN': + model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'DIN-V2-gru-att-gru': + model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'DIN-V2-gru-gru-att': + model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'DIN-V2-gru-qa-attGru': + model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'DIN-V2-gru-vec-attGru': + model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + elif model_type == 'DIEN': + model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + else: + print ("Invalid model_type : %s", model_type) + return + model.restore(sess, model_path) + print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path)) + +if __name__ == '__main__': + if len(sys.argv) == 4: + SEED = int(sys.argv[3]) + else: + SEED = 3 + tf.set_random_seed(SEED) + numpy.random.seed(SEED) + random.seed(SEED) + + if sys.argv[1] == 'train': + train(model_type=sys.argv[2], seed=SEED) + elif sys.argv[1] == 'test': + test(model_type=sys.argv[2], seed=SEED) + else: + print('do nothing...') + + diff --git a/modelzoo/CAN/script/utils.py b/modelzoo/CAN/script/utils.py new file mode 100644 index 00000000000..4590754b054 --- /dev/null +++ b/modelzoo/CAN/script/utils.py @@ -0,0 +1,404 @@ +import tensorflow as tf + +from tensorflow.python.ops.rnn_cell import * +from tensorflow.contrib.rnn.python.ops.core_rnn_cell import _linear +#from tensorflow import keras +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variable_scope as vs +#from keras import backend as K + +class QAAttGRUCell(RNNCell): + """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). + Args: + num_units: int, The number of units in the GRU cell. + activation: Nonlinearity to use. Default: `tanh`. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + kernel_initializer: (optional) The initializer to use for the weight and + projection matrices. + bias_initializer: (optional) The initializer to use for the bias. + """ + + def __init__(self, + num_units, + activation=None, + reuse=None, + kernel_initializer=None, + bias_initializer=None): + super(QAAttGRUCell, self).__init__(_reuse=reuse) + self._num_units = num_units + self._activation = activation or math_ops.tanh + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._gate_linear = None + self._candidate_linear = None + + @property + def state_size(self): + return self._num_units + + @property + def output_size(self): + return self._num_units + + def __call__(self, inputs, state, att_score): + return self.call(inputs, state, att_score) + + def call(self, inputs, state, att_score=None): + """Gated recurrent unit (GRU) with nunits cells.""" + if self._gate_linear is None: + bias_ones = self._bias_initializer + if self._bias_initializer is None: + bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) + with vs.variable_scope("gates"): # Reset gate and update gate. + self._gate_linear = _Linear( + [inputs, state], + 2 * self._num_units, + True, + bias_initializer=bias_ones, + kernel_initializer=self._kernel_initializer) + + value = math_ops.sigmoid(self._gate_linear([inputs, state])) + r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) + + r_state = r * state + if self._candidate_linear is None: + with vs.variable_scope("candidate"): + self._candidate_linear = _Linear( + [inputs, r_state], + self._num_units, + True, + bias_initializer=self._bias_initializer, + kernel_initializer=self._kernel_initializer) + c = self._activation(self._candidate_linear([inputs, r_state])) + new_h = (1. - att_score) * state + att_score * c + return new_h, new_h + +class VecAttGRUCell(RNNCell): + """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). + Args: + num_units: int, The number of units in the GRU cell. + activation: Nonlinearity to use. Default: `tanh`. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + kernel_initializer: (optional) The initializer to use for the weight and + projection matrices. + bias_initializer: (optional) The initializer to use for the bias. + """ + + def __init__(self, + num_units, + activation=None, + reuse=None, + kernel_initializer=None, + bias_initializer=None): + super(VecAttGRUCell, self).__init__(_reuse=reuse) + self._num_units = num_units + self._activation = activation or math_ops.tanh + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._gate_linear = None + self._candidate_linear = None + + @property + def state_size(self): + return self._num_units + + @property + def output_size(self): + return self._num_units + def __call__(self, inputs, state, att_score): + return self.call(inputs, state, att_score) + def call(self, inputs, state, att_score=None): + """Gated recurrent unit (GRU) with nunits cells.""" + if self._gate_linear is None: + bias_ones = self._bias_initializer + if self._bias_initializer is None: + bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) + with vs.variable_scope("gates"): # Reset gate and update gate. + self._gate_linear = _Linear( + [inputs, state], + 2 * self._num_units, + True, + bias_initializer=bias_ones, + kernel_initializer=self._kernel_initializer) + + value = math_ops.sigmoid(self._gate_linear([inputs, state])) + r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) + + r_state = r * state + if self._candidate_linear is None: + with vs.variable_scope("candidate"): + self._candidate_linear = _Linear( + [inputs, r_state], + self._num_units, + True, + bias_initializer=self._bias_initializer, + kernel_initializer=self._kernel_initializer) + c = self._activation(self._candidate_linear([inputs, r_state])) + u = (1.0 - att_score) * u + new_h = u * state + (1 - u) * c + return new_h, new_h + +def prelu(_x, scope=''): + """parametric ReLU activation""" + with tf.variable_scope(name_or_scope=scope, default_name="prelu"): + _alpha = tf.get_variable("prelu_"+scope, shape=_x.get_shape()[-1], + dtype=_x.dtype, initializer=tf.constant_initializer(0.1)) + return tf.maximum(0.0, _x) + _alpha * tf.minimum(0.0, _x) + +def calc_auc(raw_arr): + """Summary + + Args: + raw_arr (TYPE): Description + + Returns: + TYPE: Description + """ + + arr = sorted(raw_arr, key=lambda d:d[0], reverse=True) + pos, neg = 0., 0. + for record in arr: + if record[1] == 1.: + pos += 1 + else: + neg += 1 + + fp, tp = 0., 0. + xy_arr = [] + for record in arr: + if record[1] == 1.: + tp += 1 + else: + fp += 1 + xy_arr.append([fp/neg, tp/pos]) + + auc = 0. + prev_x = 0. + prev_y = 0. + for x, y in xy_arr: + if x != prev_x: + auc += ((x - prev_x) * (y + prev_y) / 2.) + prev_x = x + prev_y = y + + return auc + +def attention(query, facts, attention_size, mask, stag='null', mode='LIST', softmax_stag=1, time_major=False, return_alphas=False): + if isinstance(facts, tuple): + # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. + facts = tf.concat(facts, 2) + + if time_major: + # (T,B,D) => (B,T,D) + facts = tf.array_ops.transpose(facts, [1, 0, 2]) + + mask = tf.equal(mask, tf.ones_like(mask)) + hidden_size = facts.get_shape().as_list()[-1] # D value - hidden size of the RNN layer + input_size = query.get_shape().as_list()[-1] + + # Trainable parameters + w1 = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1)) + w2 = tf.Variable(tf.random_normal([input_size, attention_size], stddev=0.1)) + b = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) + v = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) + + with tf.name_scope('v'): + # Applying fully connected layer with non-linear activation to each of the B*T timestamps; + # the shape of `tmp` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size + tmp1 = tf.tensordot(facts, w1, axes=1) + tmp2 = tf.tensordot(query, w2, axes=1) + tmp2 = tf.reshape(tmp2, [-1, 1, tf.shape(tmp2)[-1]]) + tmp = tf.tanh((tmp1 + tmp2) + b) + + # For each of the timestamps its vector of size A from `tmp` is reduced with `v` vector + v_dot_tmp = tf.tensordot(tmp, v, axes=1, name='v_dot_tmp') # (B,T) shape + key_masks = mask # [B, 1, T] + # key_masks = tf.expand_dims(mask, 1) # [B, 1, T] + paddings = tf.ones_like(v_dot_tmp) * (-2 ** 32 + 1) + v_dot_tmp = tf.where(key_masks, v_dot_tmp, paddings) # [B, 1, T] + alphas = tf.nn.softmax(v_dot_tmp, name='alphas') # (B,T) shape + + # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape + #output = tf.reduce_sum(facts * tf.expand_dims(alphas, -1), 1) + output = facts * tf.expand_dims(alphas, -1) + output = tf.reshape(output, tf.shape(facts)) + # output = output / (facts.get_shape().as_list()[-1] ** 0.5) + if not return_alphas: + return output + else: + return output, alphas + +def din_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False): + if isinstance(facts, tuple): + # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. + facts = tf.concat(facts, 2) + print ("querry_size mismatch") + query = tf.concat(values = [ + query, + query, + ], axis=1) + + if time_major: + # (T,B,D) => (B,T,D) + facts = tf.array_ops.transpose(facts, [1, 0, 2]) + mask = tf.equal(mask, tf.ones_like(mask)) + facts_size = facts.get_shape().as_list()[-1] # D value - hidden size of the RNN layer + querry_size = query.get_shape().as_list()[-1] + queries = tf.tile(query, [1, tf.shape(facts)[1]]) + queries = tf.reshape(queries, tf.shape(facts)) + din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1) + d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att' + stag) + d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att' + stag) + d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att' + stag) + d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(facts)[1]]) + scores = d_layer_3_all + # Mask + # key_masks = tf.sequence_mask(facts_length, tf.shape(facts)[1]) # [B, T] + key_masks = tf.expand_dims(mask, 1) # [B, 1, T] + paddings = tf.ones_like(scores) * (-2 ** 32 + 1) + scores = tf.where(key_masks, scores, paddings) # [B, 1, T] + + # Scale + # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5) + + # Activation + if softmax_stag: + scores = tf.nn.softmax(scores) # [B, 1, T] + + # Weighted sum + if mode == 'SUM': + output = tf.matmul(scores, facts) # [B, 1, H] + # output = tf.reshape(output, [-1, tf.shape(facts)[-1]]) + else: + scores = tf.reshape(scores, [-1, tf.shape(facts)[1]]) + output = facts * tf.expand_dims(scores, -1) + output = tf.reshape(output, tf.shape(facts)) + return output + +def din_fcn_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False, forCnn=False): + if isinstance(facts, tuple): + # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. + facts = tf.concat(facts, 2) + if len(facts.get_shape().as_list()) == 2: + facts = tf.expand_dims(facts, 1) + + if time_major: + # (T,B,D) => (B,T,D) + facts = tf.array_ops.transpose(facts, [1, 0, 2]) + # Trainable parameters + mask = tf.equal(mask, tf.ones_like(mask)) + facts_size = facts.get_shape().as_list()[-1] # D value - hidden size of the RNN layer + querry_size = query.get_shape().as_list()[-1] + query = tf.layers.dense(query, facts_size, activation=None, name='f1' + stag) + query = prelu(query) + queries = tf.tile(query, [1, tf.shape(facts)[1]]) + queries = tf.reshape(queries, tf.shape(facts)) + din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1) + d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att' + stag) + d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att' + stag) + d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att' + stag) + d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(facts)[1]]) + scores = d_layer_3_all + # Mask + # key_masks = tf.sequence_mask(facts_length, tf.shape(facts)[1]) # [B, T] + key_masks = tf.expand_dims(mask, 1) # [B, 1, T] + paddings = tf.ones_like(scores) * (-2 ** 32 + 1) + if not forCnn: + scores = tf.where(key_masks, scores, paddings) # [B, 1, T] + + # Scale + # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5) + + # Activation + if softmax_stag: + scores = tf.nn.softmax(scores) # [B, 1, T] + + # Weighted sum + if mode == 'SUM': + output = tf.matmul(scores, facts) # [B, 1, H] + # output = tf.reshape(output, [-1, tf.shape(facts)[-1]]) + else: + scores = tf.reshape(scores, [-1, tf.shape(facts)[1]]) + output = facts * tf.expand_dims(scores, -1) + output = tf.reshape(output, tf.shape(facts)) + if return_alphas: + return output, scores + return output + +def self_attention(facts, ATTENTION_SIZE, mask, stag='null'): + if len(facts.get_shape().as_list()) == 2: + facts = tf.expand_dims(facts, 1) + + def cond(batch, output, i): + return tf.less(i, tf.shape(batch)[1]) + + def body(batch, output, i): + self_attention_tmp = din_fcn_attention(batch[:, i, :], batch[:, 0:i+1, :], + ATTENTION_SIZE, mask[:, 0:i+1], softmax_stag=1, stag=stag, + mode='LIST') + self_attention_tmp = tf.reduce_sum(self_attention_tmp, 1) + output = output.write(i, self_attention_tmp) + return batch, output, i + 1 + + output_ta = tf.TensorArray(dtype=tf.float32, + size=0, + dynamic_size=True, + element_shape=(facts[:, 0, :].get_shape())) + _, output_op, _ = tf.while_loop(cond, body, [facts, output_ta, 0]) + self_attention = output_op.stack() + self_attention = tf.transpose(self_attention, perm = [1, 0, 2]) + return self_attention + +def self_all_attention(facts, ATTENTION_SIZE, mask, stag='null'): + if len(facts.get_shape().as_list()) == 2: + facts = tf.expand_dims(facts, 1) + + def cond(batch, output, i): + return tf.less(i, tf.shape(batch)[1]) + + def body(batch, output, i): + self_attention_tmp = din_fcn_attention(batch[:, i, :], batch, + ATTENTION_SIZE, mask, softmax_stag=1, stag=stag, + mode='LIST') + self_attention_tmp = tf.reduce_sum(self_attention_tmp, 1) + output = output.write(i, self_attention_tmp) + return batch, output, i + 1 + + output_ta = tf.TensorArray(dtype=tf.float32, + size=0, + dynamic_size=True, + element_shape=(facts[:, 0, :].get_shape())) + _, output_op, _ = tf.while_loop(cond, body, [facts, output_ta, 0]) + self_attention = output_op.stack() + self_attention = tf.transpose(self_attention, perm = [1, 0, 2]) + return self_attention + +def din_fcn_shine(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False): + if isinstance(facts, tuple): + # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. + facts = tf.concat(facts, 2) + + if time_major: + # (T,B,D) => (B,T,D) + facts = tf.array_ops.transpose(facts, [1, 0, 2]) + # Trainable parameters + mask = tf.equal(mask, tf.ones_like(mask)) + facts_size = facts.get_shape().as_list()[-1] # D value - hidden size of the RNN layer + querry_size = query.get_shape().as_list()[-1] + query = tf.layers.dense(query, facts_size, activation=None, name='f1_trans_shine' + stag) + query = prelu(query) + queries = tf.tile(query, [1, tf.shape(facts)[1]]) + queries = tf.reshape(queries, tf.shape(facts)) + din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1) + d_layer_1_all = tf.layers.dense(din_all, facts_size, activation=tf.nn.sigmoid, name='f1_shine_att' + stag) + d_layer_2_all = tf.layers.dense(d_layer_1_all, facts_size, activation=tf.nn.sigmoid, name='f2_shine_att' + stag) + d_layer_2_all = tf.reshape(d_layer_2_all, tf.shape(facts)) + output = d_layer_2_all + return output + From d6d5be591e3f7ad2da16c767d5c12d3268e4c8d8 Mon Sep 17 00:00:00 2001 From: lihangtian <936971274@qq.com> Date: Wed, 3 Aug 2022 11:38:14 +0800 Subject: [PATCH 2/8] [ModelZoo] Support Co_Action Network --- modelzoo/CAN/README.md | 25 +++++++++ modelzoo/CAN/data/README.md | 25 +++++++++ modelzoo/CAN/{ => data}/prepare_data.sh | 2 +- modelzoo/CAN/{ => data}/script/Dice.py | 0 modelzoo/CAN/{ => data}/script/calc_ckpt.py | 4 +- .../CAN/{ => data}/script/data_iterator.py | 12 ++--- .../CAN/{ => data}/script/generate_voc.py | 7 ++- .../CAN/{ => data}/script/generate_voc.py.bk | 0 .../CAN/{ => data}/script/local_aggretor.py | 7 ++- modelzoo/CAN/{ => data}/script/model.py | 12 ++--- modelzoo/CAN/{ => data}/script/model_avazu.py | 0 .../CAN/{ => data}/script/process_data.py | 0 modelzoo/CAN/{ => data}/script/rnn.py | 0 modelzoo/CAN/{ => data}/script/shuffle.py | 0 .../CAN/{ => data}/script/split_by_user.py | 6 +-- modelzoo/CAN/{ => data}/script/utils.py | 2 - modelzoo/CAN/script/test.py | 10 ---- modelzoo/CAN/{script => }/train.py | 53 +++++++++---------- 18 files changed, 96 insertions(+), 69 deletions(-) create mode 100644 modelzoo/CAN/README.md create mode 100644 modelzoo/CAN/data/README.md rename modelzoo/CAN/{ => data}/prepare_data.sh (84%) rename modelzoo/CAN/{ => data}/script/Dice.py (100%) rename modelzoo/CAN/{ => data}/script/calc_ckpt.py (82%) rename modelzoo/CAN/{ => data}/script/data_iterator.py (92%) rename modelzoo/CAN/{ => data}/script/generate_voc.py (95%) rename modelzoo/CAN/{ => data}/script/generate_voc.py.bk (100%) rename modelzoo/CAN/{ => data}/script/local_aggretor.py (78%) rename modelzoo/CAN/{ => data}/script/model.py (99%) rename modelzoo/CAN/{ => data}/script/model_avazu.py (100%) rename modelzoo/CAN/{ => data}/script/process_data.py (100%) rename modelzoo/CAN/{ => data}/script/rnn.py (100%) rename modelzoo/CAN/{ => data}/script/shuffle.py (100%) rename modelzoo/CAN/{ => data}/script/split_by_user.py (64%) rename modelzoo/CAN/{ => data}/script/utils.py (99%) delete mode 100644 modelzoo/CAN/script/test.py rename modelzoo/CAN/{script => }/train.py (88%) diff --git a/modelzoo/CAN/README.md b/modelzoo/CAN/README.md new file mode 100644 index 00000000000..02d8b396649 --- /dev/null +++ b/modelzoo/CAN/README.md @@ -0,0 +1,25 @@ +# Co-Action Network + +Implementation of paper "CAN: Revisiting Feature Co-Action for Click Through Rate Prediction". + +paper: [arxiv (to be released)]() + +## Installation +dependences: + +tensorflow:1.4.1 + +python: 2.7 + +higher version of tensorflow and python3 will be supported soon! + +## Getting Started +training: + +CUDA_VISIBLE_DEVICES=0 python train.py train {model} + +model: CAN,Cartesion,PNN, etc. (check the train.py) + +## Citation +## Contact +## License diff --git a/modelzoo/CAN/data/README.md b/modelzoo/CAN/data/README.md new file mode 100644 index 00000000000..02d8b396649 --- /dev/null +++ b/modelzoo/CAN/data/README.md @@ -0,0 +1,25 @@ +# Co-Action Network + +Implementation of paper "CAN: Revisiting Feature Co-Action for Click Through Rate Prediction". + +paper: [arxiv (to be released)]() + +## Installation +dependences: + +tensorflow:1.4.1 + +python: 2.7 + +higher version of tensorflow and python3 will be supported soon! + +## Getting Started +training: + +CUDA_VISIBLE_DEVICES=0 python train.py train {model} + +model: CAN,Cartesion,PNN, etc. (check the train.py) + +## Citation +## Contact +## License diff --git a/modelzoo/CAN/prepare_data.sh b/modelzoo/CAN/data/prepare_data.sh similarity index 84% rename from modelzoo/CAN/prepare_data.sh rename to modelzoo/CAN/data/prepare_data.sh index 110b9559129..54c9733dd15 100644 --- a/modelzoo/CAN/prepare_data.sh +++ b/modelzoo/CAN/data/prepare_data.sh @@ -3,7 +3,7 @@ wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Boo wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz gunzip reviews_Books.json.gz gunzip meta_Books.json.gz -python script/process_data.py meta_Books.json reviews_Books_5.json +python script/process_data.py meta_Books.json reviews_Books.json python script/local_aggretor.py python script/split_by_user.py python script/generate_voc.py diff --git a/modelzoo/CAN/script/Dice.py b/modelzoo/CAN/data/script/Dice.py similarity index 100% rename from modelzoo/CAN/script/Dice.py rename to modelzoo/CAN/data/script/Dice.py diff --git a/modelzoo/CAN/script/calc_ckpt.py b/modelzoo/CAN/data/script/calc_ckpt.py similarity index 82% rename from modelzoo/CAN/script/calc_ckpt.py rename to modelzoo/CAN/data/script/calc_ckpt.py index fa5d4bda035..97d30de8a73 100644 --- a/modelzoo/CAN/script/calc_ckpt.py +++ b/modelzoo/CAN/data/script/calc_ckpt.py @@ -7,8 +7,6 @@ shape = variable.get_shape() variable_parameters = 1 for dim in shape: - # print(dim) variable_parameters *= dim.value - # print(variable_parameters) total_parameters += variable_parameters -print(total_parameters) + diff --git a/modelzoo/CAN/script/data_iterator.py b/modelzoo/CAN/data/script/data_iterator.py similarity index 92% rename from modelzoo/CAN/script/data_iterator.py rename to modelzoo/CAN/data/script/data_iterator.py index b5eef5f9e57..b49e5f8c2c9 100644 --- a/modelzoo/CAN/script/data_iterator.py +++ b/modelzoo/CAN/data/script/data_iterator.py @@ -1,17 +1,15 @@ import numpy import json -#import cPickle as pkl import _pickle as cPickle import random import gzip -import shuffle +import data.script.shuffle def unicode_to_utf8(d): return dict((key.encode("UTF-8"), value) for (key,value) in d.items()) def dict_unicode_to_utf8(d): - print('d={}'.format(d)) return dict(((key[0].encode("UTF-8"), key[1].encode("UTF-8")), value) for (key,value) in d.items()) def load_dict(filename): @@ -53,11 +51,10 @@ def __init__(self, source, else: self.source = fopen(source, 'r') self.source_dicts = [] - #for source_dict in [uid_voc, mid_voc, cat_voc, cat_voc, cat_voc]:# 'item_carte_voc.pkl', 'cate_carte_voc.pkl']: - for source_dict in [uid_voc, mid_voc, cat_voc, '/home/test/modelzoo/CAN/data/item_carte_voc.pkl', '/home/test/modelzoo/CAN/data/cate_carte_voc.pkl']: + for source_dict in [uid_voc, mid_voc, cat_voc, '../CAN/data/item_carte_voc.pkl', '../CAN/data/cate_carte_voc.pkl']: self.source_dicts.append(load_dict(source_dict)) - f_meta = open("/home/test/modelzoo/CAN/data/item-info", "r") + f_meta = open("../CAN/data/item-info", "r") meta_map = {} for line in f_meta: arr = line.strip().split("\t") @@ -76,7 +73,7 @@ def __init__(self, source, cat_idx = 0 self.meta_id_map[mid_idx] = cat_idx - f_review = open("/home/test/modelzoo/CAN/data/reviews-info", "r") + f_review = open("../CAN/data/reviews-info", "r") self.mid_list_for_random = [] for line in f_review: arr = line.strip().split("\t") @@ -94,7 +91,6 @@ def __init__(self, source, self.n_mid = len(self.source_dicts[1]) self.n_cat = len(self.source_dicts[2]) self.n_carte = [len(self.source_dicts[3]), len(self.source_dicts[4])] - print("n_uid=%d, n_mid=%d, n_cat=%d" % (self.n_uid, self.n_mid, self.n_cat)) self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length diff --git a/modelzoo/CAN/script/generate_voc.py b/modelzoo/CAN/data/script/generate_voc.py similarity index 95% rename from modelzoo/CAN/script/generate_voc.py rename to modelzoo/CAN/data/script/generate_voc.py index 03b6a662d97..b6816d36563 100644 --- a/modelzoo/CAN/script/generate_voc.py +++ b/modelzoo/CAN/data/script/generate_voc.py @@ -1,13 +1,13 @@ import pickle as pk -f_train = open("/home/test/modelzoo/DIEN/data/local_train_splitByUser", "r") +f_train = open("../../DIEN/data/local_train_splitByUser", "r") uid_dict = {} mid_dict = {} cat_dict = {} item_carte_dict = {} cate_carte_dict = {} -iddd = 0 + for line in f_train: arr = line.strip("\n").split("\t") clk = arr[0] @@ -34,8 +34,7 @@ if (mid, m) not in item_carte_dict: item_carte_dict[(mid, m)] = 0 item_carte_dict[(mid, m)] += 1 - #print iddd - iddd+=1 + for c in cat_list.split(""): if c not in cat_dict: cat_dict[c] = 0 diff --git a/modelzoo/CAN/script/generate_voc.py.bk b/modelzoo/CAN/data/script/generate_voc.py.bk similarity index 100% rename from modelzoo/CAN/script/generate_voc.py.bk rename to modelzoo/CAN/data/script/generate_voc.py.bk diff --git a/modelzoo/CAN/script/local_aggretor.py b/modelzoo/CAN/data/script/local_aggretor.py similarity index 78% rename from modelzoo/CAN/script/local_aggretor.py rename to modelzoo/CAN/data/script/local_aggretor.py index e7e23190a1d..e652ff3d543 100644 --- a/modelzoo/CAN/script/local_aggretor.py +++ b/modelzoo/CAN/data/script/local_aggretor.py @@ -2,9 +2,9 @@ import hashlib import random -fin = open("/home/test/modelzoo/DIEN/data/jointed-new-split-info", "r") -ftrain = open("/home/test/modelzoo/DIEN/data/local_train", "w") -ftest = open("/home/test/modelzoo/DIEN/data/local_test", "w") +fin = open("../../DIEN/data/jointed-new-split-info", "r") +ftrain = open("../../DIEN/data/local_train", "w") +ftest = open("../../DIEN/data/local_test", "w") last_user = "0" common_fea = "" @@ -25,7 +25,6 @@ if user != last_user: movie_id_list = [] cate1_list = [] - #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + "" else: history_clk_num = len(movie_id_list) cat_str = "" diff --git a/modelzoo/CAN/script/model.py b/modelzoo/CAN/data/script/model.py similarity index 99% rename from modelzoo/CAN/script/model.py rename to modelzoo/CAN/data/script/model.py index 133ded83f09..aba37138e0e 100644 --- a/modelzoo/CAN/script/model.py +++ b/modelzoo/CAN/data/script/model.py @@ -1,12 +1,10 @@ -#import tensorflow as tf -import tensorflow.compat.v1 as tf +import tensorflow as tf from tensorflow.python.ops.rnn_cell import GRUCell from tensorflow.python.ops.rnn_cell import LSTMCell from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as bi_rnn -#from tensorflow.python.ops.rnn import dynamic_rnn -from rnn import dynamic_rnn -from utils import * -from Dice import dice +from data.script.rnn import dynamic_rnn +from data.script.utils import * +from data.script.Dice import dice #### CAN config ##### weight_emb_w = [[16, 8], [8,4]] @@ -21,7 +19,7 @@ print("orders: ",orders) CALC_MODE = "can" -device = '/gpu:2' +device = '/gpu:0' #### CAN config ##### def gen_coaction(ad, his_items, dim, mode="can", mask=None,keep_fake_carte_seq=False): diff --git a/modelzoo/CAN/script/model_avazu.py b/modelzoo/CAN/data/script/model_avazu.py similarity index 100% rename from modelzoo/CAN/script/model_avazu.py rename to modelzoo/CAN/data/script/model_avazu.py diff --git a/modelzoo/CAN/script/process_data.py b/modelzoo/CAN/data/script/process_data.py similarity index 100% rename from modelzoo/CAN/script/process_data.py rename to modelzoo/CAN/data/script/process_data.py diff --git a/modelzoo/CAN/script/rnn.py b/modelzoo/CAN/data/script/rnn.py similarity index 100% rename from modelzoo/CAN/script/rnn.py rename to modelzoo/CAN/data/script/rnn.py diff --git a/modelzoo/CAN/script/shuffle.py b/modelzoo/CAN/data/script/shuffle.py similarity index 100% rename from modelzoo/CAN/script/shuffle.py rename to modelzoo/CAN/data/script/shuffle.py diff --git a/modelzoo/CAN/script/split_by_user.py b/modelzoo/CAN/data/script/split_by_user.py similarity index 64% rename from modelzoo/CAN/script/split_by_user.py rename to modelzoo/CAN/data/script/split_by_user.py index 9f570d97819..c2a7600fad5 100644 --- a/modelzoo/CAN/script/split_by_user.py +++ b/modelzoo/CAN/data/script/split_by_user.py @@ -1,8 +1,8 @@ import random -fi = open("/home/test/modelzoo/DIEN/data/local_test", "r") -ftrain = open("/home/test/modelzoo/DIEN/data/local_train_splitByUser", "w") -ftest = open("/home/test/modelzoo/DIEN/data/local_test_splitByUser", "w") +fi = open("../../DIEN/data/local_test", "r") +ftrain = open("../../DIEN/data/local_train_splitByUser", "w") +ftest = open("../../DIEN/data/local_test_splitByUser", "w") while True: rand_int = random.randint(1, 10) diff --git a/modelzoo/CAN/script/utils.py b/modelzoo/CAN/data/script/utils.py similarity index 99% rename from modelzoo/CAN/script/utils.py rename to modelzoo/CAN/data/script/utils.py index 4590754b054..641402b140c 100644 --- a/modelzoo/CAN/script/utils.py +++ b/modelzoo/CAN/data/script/utils.py @@ -2,12 +2,10 @@ from tensorflow.python.ops.rnn_cell import * from tensorflow.contrib.rnn.python.ops.core_rnn_cell import _linear -#from tensorflow import keras from tensorflow.python.ops import math_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import variable_scope as vs -#from keras import backend as K class QAAttGRUCell(RNNCell): """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). diff --git a/modelzoo/CAN/script/test.py b/modelzoo/CAN/script/test.py deleted file mode 100644 index 64b9a7f3337..00000000000 --- a/modelzoo/CAN/script/test.py +++ /dev/null @@ -1,10 +0,0 @@ -import os -import pandas as pd - -file = '/home/test/modelzoo/DIEN/data/local_train_splitByUser' -# if os.path.exists(file+'_neg') is True: -# print('YES') -# else: -# print('NOT') -data = pd.read_csv(file) -print(data.head()) \ No newline at end of file diff --git a/modelzoo/CAN/script/train.py b/modelzoo/CAN/train.py similarity index 88% rename from modelzoo/CAN/script/train.py rename to modelzoo/CAN/train.py index bc1c8a8d97d..7ef1a6cda18 100644 --- a/modelzoo/CAN/script/train.py +++ b/modelzoo/CAN/train.py @@ -1,11 +1,11 @@ import numpy -from data_iterator import DataIterator +from data.script.data_iterator import DataIterator import tensorflow as tf -from model import * +from data.script.model import * import time import random import sys -from utils import * +from data.script.utils import * from tqdm import tqdm EMBEDDING_DIM = 18 @@ -120,11 +120,11 @@ def eval(sess, test_data, model, model_path): return test_auc, loss_sum, accuracy_sum, aux_loss_sum def train( - train_file = "/home/test/modelzoo/DIEN/data/local_train_splitByUser", - test_file = "/home/test/modelzoo/DIEN/data/local_test_splitByUser", - uid_voc = "/home/test/modelzoo/CAN/data/uid_voc.pkl", - mid_voc = "/home/test/modelzoo/CAN/data/mid_voc.pkl", - cat_voc = "/home/test/modelzoo/CAN/data/cat_voc.pkl", + train_file = "../DIEN/data/local_train_splitByUser", + test_file = "../DIEN/data/local_test_splitByUser", + uid_voc = "../CAN/data/uid_voc.pkl", + mid_voc = "../CAN/data/mid_voc.pkl", + cat_voc = "../CAN/data/cat_voc.pkl", batch_size = 128, maxlen = 100, test_iter = 8400, @@ -183,7 +183,7 @@ def train( sys.stdout.flush() count() - start_time = time.time() + iter = 0 lr = 0.001 @@ -191,7 +191,6 @@ def train( loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. - print('train_data:',train_data) for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data(src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats, carte]) @@ -200,21 +199,21 @@ def train( aux_loss_sum += aux_loss iter += 1 sys.stdout.flush() - #if (iter % 100) == 0: - print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- train_aux_loss: %.4f' % (iter, loss_sum / 100, accuracy_sum / 100, aux_loss_sum / 100)) - loss_sum = 0.0 - accuracy_sum = 0.0 - aux_loss_sum = 0.0 - #if (iter % test_iter) == 0: - auc_, loss_, acc_, aux_ = eval(sess, test_data, model, best_model_path) - print('iter: %d --- test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % (iter, auc_, loss_, acc_, aux_)) - loss_sum = 0.0 - accuracy_sum = 0.0 - aux_loss_sum = 0.0 + if (iter % 100) == 0: + print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- train_aux_loss: %.4f' % (iter, loss_sum / 100, accuracy_sum / 100, aux_loss_sum / 100)) + loss_sum = 0.0 + accuracy_sum = 0.0 + aux_loss_sum = 0.0 + if (iter % test_iter) == 0: + auc_, loss_, acc_, aux_ = eval(sess, test_data, model, best_model_path) + print('iter: %d --- test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % (iter, auc_, loss_, acc_, aux_)) + loss_sum = 0.0 + accuracy_sum = 0.0 + aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' %(iter)) model.save(sess, model_path+"--"+str(iter)) - print('time:%f',(time.time()-start_time)) + lr *= 0.5 def count_flops(graph): @@ -233,11 +232,11 @@ def count(): print("Prameter: ", total_parameters) def test( - train_file = "/home/test/modelzoo/DIEN/data/local_train_splitByUser", - test_file = "/home/test/modelzoo/DIEN/data/local_test_splitByUser", - uid_voc = "/home/test/modelzoo/CAN/data/uid_voc.pkl", - mid_voc = "/home/test/modelzoo/CAN/data/mid_voc.pkl", - cat_voc = "/home/test/modelzoo/CAN/data/cat_voc.pkl", + train_file = "../DIEN/data/local_train_splitByUser", + test_file = "../DIEN/data/local_test_splitByUser", + uid_voc = "../CAN/data/uid_voc.pkl", + mid_voc = "../CAN/data/mid_voc.pkl", + cat_voc = "../CAN/data/cat_voc.pkl", batch_size = 128, maxlen = 100, model_type = 'DNN', From b438c64ffcb2ca56d2736dc4dec5bb49b465b837 Mon Sep 17 00:00:00 2001 From: lihangtian <936971274@qq.com> Date: Fri, 19 Aug 2022 21:27:43 +0800 Subject: [PATCH 3/8] [ModelZoo] Support Co_Action Net --- modelzoo/CAN/README.md | 83 ++++++++++++++++--- modelzoo/CAN/data/README.md | 27 ++---- modelzoo/CAN/data/script/calc_ckpt.py | 12 --- modelzoo/CAN/data/script/data_iterator.py | 6 +- modelzoo/CAN/{data => }/script/Dice.py | 0 modelzoo/CAN/{data => }/script/model.py | 6 +- modelzoo/CAN/{data => }/script/model_avazu.py | 0 modelzoo/CAN/{data => }/script/rnn.py | 0 modelzoo/CAN/{data => }/script/utils.py | 0 modelzoo/CAN/train.py | 4 +- 10 files changed, 88 insertions(+), 50 deletions(-) delete mode 100644 modelzoo/CAN/data/script/calc_ckpt.py rename modelzoo/CAN/{data => }/script/Dice.py (100%) rename modelzoo/CAN/{data => }/script/model.py (99%) rename modelzoo/CAN/{data => }/script/model_avazu.py (100%) rename modelzoo/CAN/{data => }/script/rnn.py (100%) rename modelzoo/CAN/{data => }/script/utils.py (100%) diff --git a/modelzoo/CAN/README.md b/modelzoo/CAN/README.md index 02d8b396649..c26f3f8eace 100644 --- a/modelzoo/CAN/README.md +++ b/modelzoo/CAN/README.md @@ -1,25 +1,86 @@ # Co-Action Network +The following is a brief directory structure and description for this example: + + + +``` +├── data # Data set directory +│ ├── prepare_data.sh # Shell script to download and process dataset +│ └── README.md # Documentation describing how to prepare dataset +│ └── script # Directory contains scripts to process dataset +│ ├── data_iterator.py +│ ├── generate_voc.py +│ ├── local_aggretor.py +│ ├── shuffle.py +│ └── split_by_user.py +├── script # Directory contains scripts to CAN model +│ ├── Dice.py +│ ├── model.py +│ ├── model_avazu.py +│ ├── rnn.py +│ └── utils.py +├── README.md # Documentation +└── train.py # Training script +``` + + + +## Content + +[TOC] + + + +## Model Structure + Implementation of paper "CAN: Revisiting Feature Co-Action for Click Through Rate Prediction". paper: [arxiv (to be released)]() -## Installation -dependences: -tensorflow:1.4.1 -python: 2.7 +## Usage + +### Stand-alone Training + +1. Please prepare the data set and DeepRec env. -higher version of tensorflow and python3 will be supported soon! + 1. Manually -## Getting Started -training: + - Follow [dataset preparation](https://github.com/alibaba/DeepRec/tree/main/modelzoo/DIEN#prepare) to prepare data set. + - Download code by `git clone https://github.com/alibaba/DeepRec` + - Follow [How to Build](https://github.com/alibaba/DeepRec#how-to-build) to build DeepRec whl package and install by `pip install $DEEPREC_WHL`. -CUDA_VISIBLE_DEVICES=0 python train.py train {model} + 2. Docker(Recommended) + + ``` + docker pull alideeprec/deeprec-release-modelzoo:latest + docker run -it alideeprec/deeprec-release-modelzoo:latest /bin/bash + + # In docker container + cd /root/modelzoo/CAN + ``` + +​ 2.train. + +``` +CUDA_VISIBLE_DEVICES=0 python script/train.py train {model} model: CAN,Cartesion,PNN, etc. (check the train.py) +``` + +​ + + + +## Dataset + +Amazon, Taobao and Avazu dataset is used as benchmark dataset. + +### Prepare + +For details of Data download, see `./data` + + -## Citation -## Contact -## License diff --git a/modelzoo/CAN/data/README.md b/modelzoo/CAN/data/README.md index 02d8b396649..1f4b135adae 100644 --- a/modelzoo/CAN/data/README.md +++ b/modelzoo/CAN/data/README.md @@ -1,25 +1,14 @@ -# Co-Action Network +# Dataset -Implementation of paper "CAN: Revisiting Feature Co-Action for Click Through Rate Prediction". +## Prepare dataset -paper: [arxiv (to be released)]() +Prepare data of DIEN first; -## Installation -dependences: +Run `prepare_data.sh` to download and process data: -tensorflow:1.4.1 +``` +sh prepare_data.sh +``` -python: 2.7 +Put data into this folder. -higher version of tensorflow and python3 will be supported soon! - -## Getting Started -training: - -CUDA_VISIBLE_DEVICES=0 python train.py train {model} - -model: CAN,Cartesion,PNN, etc. (check the train.py) - -## Citation -## Contact -## License diff --git a/modelzoo/CAN/data/script/calc_ckpt.py b/modelzoo/CAN/data/script/calc_ckpt.py deleted file mode 100644 index 97d30de8a73..00000000000 --- a/modelzoo/CAN/data/script/calc_ckpt.py +++ /dev/null @@ -1,12 +0,0 @@ - -ckpt = tf.train.get_checkpoint_state("./ckpt_path/").model_checkpoint_path -saver = tf.train.import_meta_graph(ckpt+'.meta') -variables = tf.trainable_variables() -total_parameters = 0 -for variable in variables: - shape = variable.get_shape() - variable_parameters = 1 - for dim in shape: - variable_parameters *= dim.value - total_parameters += variable_parameters - diff --git a/modelzoo/CAN/data/script/data_iterator.py b/modelzoo/CAN/data/script/data_iterator.py index b49e5f8c2c9..75c53c46919 100644 --- a/modelzoo/CAN/data/script/data_iterator.py +++ b/modelzoo/CAN/data/script/data_iterator.py @@ -51,10 +51,10 @@ def __init__(self, source, else: self.source = fopen(source, 'r') self.source_dicts = [] - for source_dict in [uid_voc, mid_voc, cat_voc, '../CAN/data/item_carte_voc.pkl', '../CAN/data/cate_carte_voc.pkl']: + for source_dict in [uid_voc, mid_voc, cat_voc, './data/item_carte_voc.pkl', './data/cate_carte_voc.pkl']: self.source_dicts.append(load_dict(source_dict)) - f_meta = open("../CAN/data/item-info", "r") + f_meta = open("./data/item-info", "r") meta_map = {} for line in f_meta: arr = line.strip().split("\t") @@ -73,7 +73,7 @@ def __init__(self, source, cat_idx = 0 self.meta_id_map[mid_idx] = cat_idx - f_review = open("../CAN/data/reviews-info", "r") + f_review = open("./data/reviews-info", "r") self.mid_list_for_random = [] for line in f_review: arr = line.strip().split("\t") diff --git a/modelzoo/CAN/data/script/Dice.py b/modelzoo/CAN/script/Dice.py similarity index 100% rename from modelzoo/CAN/data/script/Dice.py rename to modelzoo/CAN/script/Dice.py diff --git a/modelzoo/CAN/data/script/model.py b/modelzoo/CAN/script/model.py similarity index 99% rename from modelzoo/CAN/data/script/model.py rename to modelzoo/CAN/script/model.py index aba37138e0e..e968c382f55 100644 --- a/modelzoo/CAN/data/script/model.py +++ b/modelzoo/CAN/script/model.py @@ -2,9 +2,9 @@ from tensorflow.python.ops.rnn_cell import GRUCell from tensorflow.python.ops.rnn_cell import LSTMCell from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as bi_rnn -from data.script.rnn import dynamic_rnn -from data.script.utils import * -from data.script.Dice import dice +from script.rnn import dynamic_rnn +from script.utils import * +from script.Dice import dice #### CAN config ##### weight_emb_w = [[16, 8], [8,4]] diff --git a/modelzoo/CAN/data/script/model_avazu.py b/modelzoo/CAN/script/model_avazu.py similarity index 100% rename from modelzoo/CAN/data/script/model_avazu.py rename to modelzoo/CAN/script/model_avazu.py diff --git a/modelzoo/CAN/data/script/rnn.py b/modelzoo/CAN/script/rnn.py similarity index 100% rename from modelzoo/CAN/data/script/rnn.py rename to modelzoo/CAN/script/rnn.py diff --git a/modelzoo/CAN/data/script/utils.py b/modelzoo/CAN/script/utils.py similarity index 100% rename from modelzoo/CAN/data/script/utils.py rename to modelzoo/CAN/script/utils.py diff --git a/modelzoo/CAN/train.py b/modelzoo/CAN/train.py index 7ef1a6cda18..dd54677aecc 100644 --- a/modelzoo/CAN/train.py +++ b/modelzoo/CAN/train.py @@ -1,11 +1,11 @@ import numpy from data.script.data_iterator import DataIterator import tensorflow as tf -from data.script.model import * +from script.model import * import time import random import sys -from data.script.utils import * +from script.utils import * from tqdm import tqdm EMBEDDING_DIM = 18 From c5df688db708d3a890309b6d5655fbb90aa1fd90 Mon Sep 17 00:00:00 2001 From: lihangtian <936971274@qq.com> Date: Fri, 19 Aug 2022 21:35:51 +0800 Subject: [PATCH 4/8] [ModelZoo] Support FNN --- modelzoo/FNN/README.md | 87 + modelzoo/FNN/data/README.md | 4 + modelzoo/FNN/result/README.md | 2 + modelzoo/FNN/script/__init__.py | 0 modelzoo/FNN/script/contrib/__init__.py | 0 modelzoo/FNN/script/contrib/rnn.py | 1153 +++++++++++++ modelzoo/FNN/script/contrib/rnn_v2.py | 1452 ++++++++++++++++ modelzoo/FNN/script/contrib/utils.py | 378 +++++ modelzoo/FNN/script/estimator/__init__.py | 1 + .../FNN/script/estimator/feature_column.py | 52 + modelzoo/FNN/script/estimator/inputs.py | 52 + modelzoo/FNN/script/estimator/utils.py | 217 +++ modelzoo/FNN/script/feature_column.py | 220 +++ modelzoo/FNN/script/inputs.py | 155 ++ modelzoo/FNN/script/layers/__init__.py | 52 + modelzoo/FNN/script/layers/activation.py | 85 + modelzoo/FNN/script/layers/core.py | 267 +++ modelzoo/FNN/script/layers/interaction.py | 1492 +++++++++++++++++ modelzoo/FNN/script/layers/normalization.py | 51 + modelzoo/FNN/script/layers/sequence.py | 901 ++++++++++ modelzoo/FNN/script/layers/utils.py | 302 ++++ modelzoo/FNN/script/models/__init__.py | 4 + modelzoo/FNN/script/models/fnn.py | 53 + modelzoo/FNN/script/utils.py | 46 + modelzoo/FNN/train.py | 139 ++ 25 files changed, 7165 insertions(+) create mode 100644 modelzoo/FNN/README.md create mode 100644 modelzoo/FNN/data/README.md create mode 100644 modelzoo/FNN/result/README.md create mode 100644 modelzoo/FNN/script/__init__.py create mode 100644 modelzoo/FNN/script/contrib/__init__.py create mode 100644 modelzoo/FNN/script/contrib/rnn.py create mode 100644 modelzoo/FNN/script/contrib/rnn_v2.py create mode 100644 modelzoo/FNN/script/contrib/utils.py create mode 100644 modelzoo/FNN/script/estimator/__init__.py create mode 100644 modelzoo/FNN/script/estimator/feature_column.py create mode 100644 modelzoo/FNN/script/estimator/inputs.py create mode 100644 modelzoo/FNN/script/estimator/utils.py create mode 100644 modelzoo/FNN/script/feature_column.py create mode 100644 modelzoo/FNN/script/inputs.py create mode 100644 modelzoo/FNN/script/layers/__init__.py create mode 100644 modelzoo/FNN/script/layers/activation.py create mode 100644 modelzoo/FNN/script/layers/core.py create mode 100644 modelzoo/FNN/script/layers/interaction.py create mode 100644 modelzoo/FNN/script/layers/normalization.py create mode 100644 modelzoo/FNN/script/layers/sequence.py create mode 100644 modelzoo/FNN/script/layers/utils.py create mode 100644 modelzoo/FNN/script/models/__init__.py create mode 100644 modelzoo/FNN/script/models/fnn.py create mode 100644 modelzoo/FNN/script/utils.py create mode 100644 modelzoo/FNN/train.py diff --git a/modelzoo/FNN/README.md b/modelzoo/FNN/README.md new file mode 100644 index 00000000000..a2f9e721921 --- /dev/null +++ b/modelzoo/FNN/README.md @@ -0,0 +1,87 @@ +# FNN + +The following is a brief directory structure and description for this example: + + + +``` +├── data # Data set directory +│ └── README.md # Documentation describing how to prepare dataset +├── script # model set directory +│ ├── contrib #Directory contains rnn +│ ├── estimator #Directory contains estimator to data +│ ├── layers #Directory contains layers of model +│ ├── models #Directory contains FNN model +│ ├── feature_column.py # Feature marker +│ ├── inputs.py #Construction of Input Layer +│ └──utils +├── train.py # Training script +└── README.md # Documentation +``` + + + +## Content + +[TOC] + + + +## Model Structure + +Implementation of paper "Deep Learning over Multi-field Categorical Data– A Case Study on User Response Prediction". + + + +## Usage + +### Stand-alone Training + +1. Please prepare the data set and DeepRec env. + + 1. Manually + + - Follow [dataset preparation](https://github.com/alibaba/DeepRec/tree/main/modelzoo/DIEN#prepare) to prepare data set. + - Download code by `git clone https://github.com/alibaba/DeepRec` + - Follow [How to Build](https://github.com/alibaba/DeepRec#how-to-build) to build DeepRec whl package and install by `pip install $DEEPREC_WHL`. + + 2. Docker(Recommended) + + ``` + docker pull alideeprec/deeprec-release-modelzoo:latest + docker run -it alideeprec/deeprec-release-modelzoo:latest /bin/bash + + # In docker container + cd /root/modelzoo/CAN + ``` + +​ 2.train. + +``` + python train.py +``` + +​ + + + +## Dataset + + iPinYou dataset is used as benchmark dataset. + +### Prepare + +For details of Data download, see [Data Preparation](https://github.com/Atomu2014/make-ipinyou-data) + +### Campaigs + +We use campaign 1458 as example here. + +``` +make-ipinyou-data/1458$ ls +featindex.txt test.log.txt test.txt train.log.txt train.txt +``` + +- `train.log.txt` and `test.log.txt` are the formalised string data for each row (record) in train and test. The first column is whether the user click the ad or not. +- `featindex.txt`maps the features to their indexes. For example, `8:1.1.174.* 76` means that the 8th column in `train.log.txt` with the string `1.1.174.*` maps to feature index `76`. +- `train.txt` and `test.txt` are the mapped vector data for `train.log.txt` and `test.log.txt`. The format is y:click, and x:features. Such data is in the standard form as introduced in [iPinYou Benchmarking](http://arxiv.org/abs/1407.7073). diff --git a/modelzoo/FNN/data/README.md b/modelzoo/FNN/data/README.md new file mode 100644 index 00000000000..15a0bc61c8d --- /dev/null +++ b/modelzoo/FNN/data/README.md @@ -0,0 +1,4 @@ +make-ipinyou-data +================= + +For details of Data download, see [Data Preparation](https://github.com/Atomu2014/make-ipinyou-data) diff --git a/modelzoo/FNN/result/README.md b/modelzoo/FNN/result/README.md new file mode 100644 index 00000000000..6f962fb1716 --- /dev/null +++ b/modelzoo/FNN/result/README.md @@ -0,0 +1,2 @@ +# Result +Evaluation Metrics file are default saved in this folder. diff --git a/modelzoo/FNN/script/__init__.py b/modelzoo/FNN/script/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/modelzoo/FNN/script/contrib/__init__.py b/modelzoo/FNN/script/contrib/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/modelzoo/FNN/script/contrib/rnn.py b/modelzoo/FNN/script/contrib/rnn.py new file mode 100644 index 00000000000..b3554993063 --- /dev/null +++ b/modelzoo/FNN/script/contrib/rnn.py @@ -0,0 +1,1153 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +# + +# Licensed under the Apache License, Version 2.0 (the "License"); + +# you may not use this file except in compliance with the License. + +# You may obtain a copy of the License at + +# + +# http://www.apache.org/licenses/LICENSE-2.0 + +# + +# Unless required by applicable law or agreed to in writing, software + +# distributed under the License is distributed on an "AS IS" BASIS, + +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +# See the License for the specific language governing permissions and + +# limitations under the License. + +# ============================================================================== + + +"""RNN helpers for TensorFlow models. +@@bidirectional_dynamic_rnn +@@dynamic_rnn +@@raw_rnn +@@static_rnn +@@static_state_saving_rnn +@@static_bidirectional_rnn +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util import nest +import tensorflow as tf + + +def _like_rnncell_(cell): + """Checks that a given object is an RNNCell by using duck typing.""" + + conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"), + + hasattr(cell, "zero_state"), callable(cell)] + + return all(conditions) + + +# pylint: disable=protected-access + +_concat = rnn_cell_impl._concat +try: + _like_rnncell = rnn_cell_impl._like_rnncell +except Exception as e: + _like_rnncell = _like_rnncell_ + + +# pylint: enable=protected-access + + +def _transpose_batch_time(x): + """Transpose the batch and time dimensions of a Tensor. + Retains as much of the static shape information as possible. + Args: + x: A tensor of rank 2 or higher. + Returns: + x transposed along the first two dimensions. + Raises: + ValueError: if `x` is rank 1 or lower. + """ + + x_static_shape = x.get_shape() + + if x_static_shape.ndims is not None and x_static_shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2, but saw shape: %s" % + + (x, x_static_shape)) + + x_rank = array_ops.rank(x) + + x_t = array_ops.transpose( + + x, array_ops.concat( + + ([1, 0], math_ops.range(2, x_rank)), axis=0)) + + x_t.set_shape( + + tensor_shape.TensorShape([ + + x_static_shape[1].value, x_static_shape[0].value + + ]).concatenate(x_static_shape[2:])) + + return x_t + + +def _best_effort_input_batch_size(flat_input): + """Get static input batch size if available, with fallback to the dynamic one. + Args: + flat_input: An iterable of time major input Tensors of shape [max_time, + batch_size, ...]. All inputs should have compatible batch sizes. + Returns: + The batch size in Python integer if available, or a scalar Tensor otherwise. + Raises: + ValueError: if there is any input with an invalid shape. + """ + + for input_ in flat_input: + + shape = input_.shape + + if shape.ndims is None: + continue + + if shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2" % input_) + + batch_size = shape[1].value + + if batch_size is not None: + return batch_size + + # Fallback to the dynamic batch size of the first input. + + return array_ops.shape(flat_input[0])[1] + + +def _infer_state_dtype(explicit_dtype, state): + """Infer the dtype of an RNN state. + Args: + explicit_dtype: explicitly declared dtype or None. + state: RNN's hidden state. Must be a Tensor or a nested iterable containing + Tensors. + Returns: + dtype: inferred dtype of hidden state. + Raises: + ValueError: if `state` has heterogeneous dtypes or is empty. + """ + + if explicit_dtype is not None: + + return explicit_dtype + + elif nest.is_sequence(state): + + inferred_dtypes = [element.dtype for element in nest.flatten(state)] + + if not inferred_dtypes: + raise ValueError("Unable to infer dtype from empty state.") + + all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) + + if not all_same: + raise ValueError( + + "State has tensors of different inferred_dtypes. Unable to infer a " + + "single representative dtype.") + + return inferred_dtypes[0] + + else: + + return state.dtype + + +# pylint: disable=unused-argument + +def _rnn_step( + + time, sequence_length, min_sequence_length, max_sequence_length, + + zero_output, state, call_cell, state_size, skip_conditionals=False): + """Calculate one step of a dynamic RNN minibatch. + Returns an (output, state) pair conditioned on the sequence_lengths. + When skip_conditionals=False, the pseudocode is something like: + if t >= max_sequence_length: + return (zero_output, state) + if t < min_sequence_length: + return call_cell() + # Selectively output zeros or output, old state or new state depending + # on if we've finished calculating each row. + new_output, new_state = call_cell() + final_output = np.vstack([ + zero_output if time >= sequence_lengths[r] else new_output_r + for r, new_output_r in enumerate(new_output) + ]) + final_state = np.vstack([ + state[r] if time >= sequence_lengths[r] else new_state_r + for r, new_state_r in enumerate(new_state) + ]) + return (final_output, final_state) + Args: + time: Python int, the current time step + sequence_length: int32 `Tensor` vector of size [batch_size] + min_sequence_length: int32 `Tensor` scalar, min of sequence_length + max_sequence_length: int32 `Tensor` scalar, max of sequence_length + zero_output: `Tensor` vector of shape [output_size] + state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, + or a list/tuple of such tensors. + call_cell: lambda returning tuple of (new_output, new_state) where + new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. + new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. + state_size: The `cell.state_size` associated with the state. + skip_conditionals: Python bool, whether to skip using the conditional + calculations. This is useful for `dynamic_rnn`, where the input tensor + matches `max_sequence_length`, and using conditionals just slows + everything down. + Returns: + A tuple of (`final_output`, `final_state`) as given by the pseudocode above: + final_output is a `Tensor` matrix of shape [batch_size, output_size] + final_state is either a single `Tensor` matrix, or a tuple of such + matrices (matching length and shapes of input `state`). + Raises: + ValueError: If the cell returns a state tuple whose length does not match + that returned by `state_size`. + """ + + # Convert state to a list for ease of use + + flat_state = nest.flatten(state) + + flat_zero_output = nest.flatten(zero_output) + + def _copy_one_through(output, new_output): + + # If the state contains a scalar value we simply pass it through. + + if output.shape.ndims == 0: + return new_output + + copy_cond = (time >= sequence_length) + + with ops.colocate_with(new_output): + return array_ops.where(copy_cond, output, new_output) + + def _copy_some_through(flat_new_output, flat_new_state): + + # Use broadcasting select to determine which values should get + + # the previous state & zero output, and which values should get + + # a calculated state & output. + + flat_new_output = [ + + _copy_one_through(zero_output, new_output) + + for zero_output, new_output in zip(flat_zero_output, flat_new_output)] + + flat_new_state = [ + + _copy_one_through(state, new_state) + + for state, new_state in zip(flat_state, flat_new_state)] + + return flat_new_output + flat_new_state + + def _maybe_copy_some_through(): + + """Run RNN step. Pass through either no or some past state.""" + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + flat_new_state = nest.flatten(new_state) + + flat_new_output = nest.flatten(new_output) + + return control_flow_ops.cond( + + # if t < min_seq_len: calculate and return everything + + time < min_sequence_length, lambda: flat_new_output + flat_new_state, + + # else copy some of it through + + lambda: _copy_some_through(flat_new_output, flat_new_state)) + + # TODO(ebrevdo): skipping these conditionals may cause a slowdown, + + # but benefits from removing cond() and its gradient. We should + + # profile with and without this switch here. + + if skip_conditionals: + + # Instead of using conditionals, perform the selective copy at all time + + # steps. This is faster when max_seq_len is equal to the number of unrolls + + # (which is typical for dynamic_rnn). + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + new_state = nest.flatten(new_state) + + new_output = nest.flatten(new_output) + + final_output_and_state = _copy_some_through(new_output, new_state) + + else: + + empty_update = lambda: flat_zero_output + flat_state + + final_output_and_state = control_flow_ops.cond( + + # if t >= max_seq_len: copy all state through, output zeros + + time >= max_sequence_length, empty_update, + + # otherwise calculation is required: copy some or all of it through + + _maybe_copy_some_through) + + if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): + raise ValueError("Internal error: state and output were not concatenated " + + "correctly.") + + final_output = final_output_and_state[:len(flat_zero_output)] + + final_state = final_output_and_state[len(flat_zero_output):] + + for output, flat_output in zip(final_output, flat_zero_output): + output.set_shape(flat_output.get_shape()) + + for substate, flat_substate in zip(final_state, flat_state): + substate.set_shape(flat_substate.get_shape()) + + final_output = nest.pack_sequence_as( + + structure=zero_output, flat_sequence=final_output) + + final_state = nest.pack_sequence_as( + + structure=state, flat_sequence=final_state) + + return final_output, final_state + + +def _reverse_seq(input_seq, lengths): + """Reverse a list of Tensors up to specified lengths. + Args: + input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) + or nested tuples of tensors. + lengths: A `Tensor` of dimension batch_size, containing lengths for each + sequence in the batch. If "None" is specified, simply reverses + the list. + Returns: + time-reversed sequence + """ + + if lengths is None: + return list(reversed(input_seq)) + + flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) + + flat_results = [[] for _ in range(len(input_seq))] + + for sequence in zip(*flat_input_seq): + + input_shape = tensor_shape.unknown_shape( + + ndims=sequence[0].get_shape().ndims) + + for input_ in sequence: + input_shape.merge_with(input_.get_shape()) + + input_.set_shape(input_shape) + + # Join into (time, batch_size, depth) + + s_joined = array_ops.stack(sequence) + + # Reverse along dimension 0 + + s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) + + # Split again into list + + result = array_ops.unstack(s_reversed) + + for r, flat_result in zip(result, flat_results): + r.set_shape(input_shape) + + flat_result.append(r) + + results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) + + for input_, flat_result in zip(input_seq, flat_results)] + + return results + + +# +# def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, +# +# initial_state_fw=None, initial_state_bw=None, +# +# dtype=None, parallel_iterations=None, +# +# swap_memory=False, time_major=False, scope=None): +# +# """Creates a dynamic version of bidirectional recurrent neural network. +# +# +# +# Takes input and builds independent forward and backward RNNs. The input_size +# +# of forward and backward cell must match. The initial state for both directions +# +# is zero by default (but can be set optionally) and no intermediate states are +# +# ever returned -- the network is fully unrolled for the given (passed in) +# +# length(s) of the sequence(s) or completely unrolled if length(s) is not +# +# given. +# +# +# +# Args: +# +# cell_fw: An instance of RNNCell, to be used for forward direction. +# +# cell_bw: An instance of RNNCell, to be used for backward direction. +# +# inputs: The RNN inputs. +# +# If time_major == False (default), this must be a tensor of shape: +# +# `[batch_size, max_time, ...]`, or a nested tuple of such elements. +# +# If time_major == True, this must be a tensor of shape: +# +# `[max_time, batch_size, ...]`, or a nested tuple of such elements. +# +# sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, +# +# containing the actual lengths for each of the sequences in the batch. +# +# If not provided, all batch entries are assumed to be full sequences; and +# +# time reversal is applied from time `0` to `max_time` for each sequence. +# +# initial_state_fw: (optional) An initial state for the forward RNN. +# +# This must be a tensor of appropriate type and shape +# +# `[batch_size, cell_fw.state_size]`. +# +# If `cell_fw.state_size` is a tuple, this should be a tuple of +# +# tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. +# +# initial_state_bw: (optional) Same as for `initial_state_fw`, but using +# +# the corresponding properties of `cell_bw`. +# +# dtype: (optional) The data type for the initial states and expected output. +# +# Required if initial_states are not provided or RNN states have a +# +# heterogeneous dtype. +# +# parallel_iterations: (Default: 32). The number of iterations to run in +# +# parallel. Those operations which do not have any temporal dependency +# +# and can be run in parallel, will be. This parameter trades off +# +# time for space. Values >> 1 use more memory but take less time, +# +# while smaller values use less memory but computations take longer. +# +# swap_memory: Transparently swap the tensors produced in forward inference +# +# but needed for back prop from GPU to CPU. This allows training RNNs +# +# which would typically not fit on a single GPU, with very minimal (or no) +# +# performance penalty. +# +# time_major: The shape format of the `inputs` and `outputs` Tensors. +# +# If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. +# +# If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. +# +# Using `time_major = True` is a bit more efficient because it avoids +# +# transposes at the beginning and end of the RNN calculation. However, +# +# most TensorFlow data is batch-major, so by default this function +# +# accepts input and emits output in batch-major form. +# +# scope: VariableScope for the created subgraph; defaults to +# +# "bidirectional_rnn" +# +# +# +# Returns: +# +# A tuple (outputs, output_states) where: +# +# outputs: A tuple (output_fw, output_bw) containing the forward and +# +# the backward rnn output `Tensor`. +# +# If time_major == False (default), +# +# output_fw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_bw.output_size]`. +# +# If time_major == True, +# +# output_fw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_bw.output_size]`. +# +# It returns a tuple instead of a single concatenated `Tensor`, unlike +# +# in the `bidirectional_rnn`. If the concatenated one is preferred, +# +# the forward and backward outputs can be concatenated as +# +# `tf.concat(outputs, 2)`. +# +# output_states: A tuple (output_state_fw, output_state_bw) containing +# +# the forward and the backward final states of bidirectional rnn. +# +# +# +# Raises: +# +# TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. +# +# """ +# +# +# +# if not _like_rnncell(cell_fw): +# +# raise TypeError("cell_fw must be an instance of RNNCell") +# +# if not _like_rnncell(cell_bw): +# +# raise TypeError("cell_bw must be an instance of RNNCell") +# +# +# +# with vs.variable_scope(scope or "bidirectional_rnn"): +# +# # Forward direction +# +# with vs.variable_scope("fw") as fw_scope: +# +# output_fw, output_state_fw = dynamic_rnn( +# +# cell=cell_fw, inputs=inputs, sequence_length=sequence_length, +# +# initial_state=initial_state_fw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=fw_scope) +# +# +# +# # Backward direction +# +# if not time_major: +# +# time_dim = 1 +# +# batch_dim = 0 +# +# else: +# +# time_dim = 0 +# +# batch_dim = 1 +# +# +# +# def _reverse(input_, seq_lengths, seq_dim, batch_dim): +# +# if seq_lengths is not None: +# +# return array_ops.reverse_sequence( +# +# input=input_, seq_lengths=seq_lengths, +# +# seq_dim=seq_dim, batch_dim=batch_dim) +# +# else: +# +# return array_ops.reverse(input_, axis=[seq_dim]) +# +# +# +# with vs.variable_scope("bw") as bw_scope: +# +# inputs_reverse = _reverse( +# +# inputs, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# tmp, output_state_bw = dynamic_rnn( +# +# cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, +# +# initial_state=initial_state_bw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=bw_scope) +# +# +# +# output_bw = _reverse( +# +# tmp, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# +# +# outputs = (output_fw, output_bw) +# +# output_states = (output_state_fw, output_state_bw) +# +# +# +# return (outputs, output_states) +# + + +def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, + + dtype=None, parallel_iterations=None, swap_memory=False, + + time_major=False, scope=None): + """Creates a recurrent neural network specified by RNNCell `cell`. + Performs fully dynamic unrolling of `inputs`. + Example: + ```python + # create a BasicRNNCell + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] + # defining initial state + initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) + # 'state' is a tensor of shape [batch_size, cell_state_size] + outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, + initial_state=initial_state, + dtype=tf.float32) + ``` + ```python + # create 2 LSTMCells + rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] + # create a RNN cell composed sequentially of a number of RNNCells + multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) + # 'outputs' is a tensor of shape [batch_size, max_time, 256] + # 'state' is a N-tuple where N is the number of LSTMCells containing a + # tf.contrib.rnn.LSTMStateTuple for each cell + outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, + inputs=data, + dtype=tf.float32) + ``` + Args: + cell: An instance of RNNCell. + inputs: The RNN inputs. + If `time_major == False` (default), this must be a `Tensor` of shape: + `[batch_size, max_time, ...]`, or a nested tuple of such + elements. + If `time_major == True`, this must be a `Tensor` of shape: + `[max_time, batch_size, ...]`, or a nested tuple of such + elements. + This may also be a (possibly nested) tuple of Tensors satisfying + this property. The first two dimensions must match across all the inputs, + but otherwise the ranks and other shape components may differ. + In this case, input to `cell` at each time-step will replicate the + structure of these tuples, except for the time dimension (from which the + time is taken). + The input to `cell` at each time step will be a `Tensor` or (possibly + nested) tuple of Tensors each with dimensions `[batch_size, ...]`. + sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. + Used to copy-through state and zero-out outputs when past a batch + element's sequence length. So it's more for correctness than performance. + initial_state: (optional) An initial state for the RNN. + If `cell.state_size` is an integer, this must be + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. + If `cell.state_size` is a tuple, this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell.state_size`. + dtype: (optional) The data type for the initial state and expected output. + Required if initial_state is not provided or RNN state has a heterogeneous + dtype. + parallel_iterations: (Default: 32). The number of iterations to run in + parallel. Those operations which do not have any temporal dependency + and can be run in parallel, will be. This parameter trades off + time for space. Values >> 1 use more memory but take less time, + while smaller values use less memory but computations take longer. + swap_memory: Transparently swap the tensors produced in forward inference + but needed for back prop from GPU to CPU. This allows training RNNs + which would typically not fit on a single GPU, with very minimal (or no) + performance penalty. + time_major: The shape format of the `inputs` and `outputs` Tensors. + If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. + If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. + Using `time_major = True` is a bit more efficient because it avoids + transposes at the beginning and end of the RNN calculation. However, + most TensorFlow data is batch-major, so by default this function + accepts input and emits output in batch-major form. + scope: VariableScope for the created subgraph; defaults to "rnn". + Returns: + A pair (outputs, state) where: + outputs: The RNN output `Tensor`. + If time_major == False (default), this will be a `Tensor` shaped: + `[batch_size, max_time, cell.output_size]`. + If time_major == True, this will be a `Tensor` shaped: + `[max_time, batch_size, cell.output_size]`. + Note, if `cell.output_size` is a (possibly nested) tuple of integers + or `TensorShape` objects, then `outputs` will be a tuple having the + same structure as `cell.output_size`, containing Tensors having shapes + corresponding to the shape data in `cell.output_size`. + state: The final state. If `cell.state_size` is an int, this + will be shaped `[batch_size, cell.state_size]`. If it is a + `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. + If it is a (possibly nested) tuple of ints or `TensorShape`, this will + be a tuple having the corresponding shapes. If cells are `LSTMCells` + `state` will be a tuple containing a `LSTMStateTuple` for each cell. + Raises: + TypeError: If `cell` is not an instance of RNNCell. + ValueError: If inputs is None or an empty list. + """ + + if not _like_rnncell(cell): + raise TypeError("cell must be an instance of RNNCell") + + # By default, time_major==False and inputs are batch-major: shaped + + # [batch, time, depth] + + # For internal calculations, we transpose to [time, batch, depth] + + flat_input = nest.flatten(inputs) + + if not time_major: + # (B,T,D) => (T,B,D) + + flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] + + flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) + + parallel_iterations = parallel_iterations or 32 + + if sequence_length is not None: + + sequence_length = math_ops.to_int32(sequence_length) + + if sequence_length.get_shape().ndims not in (None, 1): + raise ValueError( + + "sequence_length must be a vector of length batch_size, " + + "but saw shape: %s" % sequence_length.get_shape()) + + sequence_length = array_ops.identity( # Just to find it in the graph. + + sequence_length, name="sequence_length") + + # Create a new scope in which the caching device is either + + # determined by the parent scope, or is set to place the cached + + # Variable using the same placement as for the rest of the RNN. + + with vs.variable_scope(scope or "rnn",reuse=tf.AUTO_REUSE) as varscope:#TODO:user defined reuse + + if varscope.caching_device is None: + varscope.set_caching_device(lambda op: op.device) + + batch_size = _best_effort_input_batch_size(flat_input) + + if initial_state is not None: + + state = initial_state + + else: + + if not dtype: + raise ValueError("If there is no initial_state, you must give a dtype.") + + state = cell.zero_state(batch_size, dtype) + + def _assert_has_shape(x, shape): + + x_shape = array_ops.shape(x) + + packed_shape = array_ops.stack(shape) + + return control_flow_ops.Assert( + + math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), + + ["Expected shape for Tensor %s is " % x.name, + + packed_shape, " but saw shape: ", x_shape]) + + if sequence_length is not None: + # Perform some shape validation + + with ops.control_dependencies( + + [_assert_has_shape(sequence_length, [batch_size])]): + sequence_length = array_ops.identity( + + sequence_length, name="CheckSeqLen") + + inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) + + (outputs, final_state) = _dynamic_rnn_loop( + + cell, + + inputs, + + state, + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory, + + att_scores=att_scores, + + sequence_length=sequence_length, + + dtype=dtype) + + # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. + + # If we are performing batch-major calculations, transpose output back + + # to shape [batch, time, depth] + + if not time_major: + # (T,B,D) => (B,T,D) + + outputs = nest.map_structure(_transpose_batch_time, outputs) + + return (outputs, final_state) + + +def _dynamic_rnn_loop(cell, + + inputs, + + initial_state, + + parallel_iterations, + + swap_memory, + + att_scores=None, + + sequence_length=None, + + dtype=None): + """Internal implementation of Dynamic RNN. + Args: + cell: An instance of RNNCell. + inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested + tuple of such elements. + initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if + `cell.state_size` is a tuple, then this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell.state_size`. + parallel_iterations: Positive Python int. + swap_memory: A Python boolean + sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. + dtype: (optional) Expected dtype of output. If not specified, inferred from + initial_state. + Returns: + Tuple `(final_outputs, final_state)`. + final_outputs: + A `Tensor` of shape `[time, batch_size, cell.output_size]`. If + `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` + objects, then this returns a (possibly nsted) tuple of Tensors matching + the corresponding shapes. + final_state: + A `Tensor`, or possibly nested tuple of Tensors, matching in length + and shapes to `initial_state`. + Raises: + ValueError: If the input depth cannot be inferred via shape inference + from the inputs. + """ + + state = initial_state + + assert isinstance(parallel_iterations, int), "parallel_iterations must be int" + + state_size = cell.state_size + + flat_input = nest.flatten(inputs) + + flat_output_size = nest.flatten(cell.output_size) + + # Construct an initial output + + input_shape = array_ops.shape(flat_input[0]) + + time_steps = input_shape[0] + + batch_size = _best_effort_input_batch_size(flat_input) + + inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) + + for input_ in flat_input) + + const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] + + for shape in inputs_got_shape: + + if not shape[2:].is_fully_defined(): + raise ValueError( + + "Input size (depth of inputs) must be accessible via shape inference," + + " but saw value None.") + + got_time_steps = shape[0].value + + got_batch_size = shape[1].value + + if const_time_steps != got_time_steps: + raise ValueError( + + "Time steps is not the same for all the elements in the input in a " + + "batch.") + + if const_batch_size != got_batch_size: + raise ValueError( + + "Batch_size is not the same for all the elements in the input.") + + # Prepare dynamic conditional copying of state & output + + def _create_zero_arrays(size): + + size = _concat(batch_size, size) + + return array_ops.zeros( + + array_ops.stack(size), _infer_state_dtype(dtype, state)) + + flat_zero_output = tuple(_create_zero_arrays(output) + + for output in flat_output_size) + + zero_output = nest.pack_sequence_as(structure=cell.output_size, + + flat_sequence=flat_zero_output) + + if sequence_length is not None: + min_sequence_length = math_ops.reduce_min(sequence_length) + + max_sequence_length = math_ops.reduce_max(sequence_length) + + time = array_ops.constant(0, dtype=dtypes.int32, name="time") + + with ops.name_scope("dynamic_rnn") as scope: + + base_name = scope + + def _create_ta(name, dtype): + + return tensor_array_ops.TensorArray(dtype=dtype, + + size=time_steps, + + tensor_array_name=base_name + name) + + output_ta = tuple(_create_ta("output_%d" % i, + + _infer_state_dtype(dtype, state)) + + for i in range(len(flat_output_size))) + + input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) + + for i in range(len(flat_input))) + + input_ta = tuple(ta.unstack(input_) + + for ta, input_ in zip(input_ta, flat_input)) + + def _time_step(time, output_ta_t, state, att_scores=None): + + """Take a time step of the dynamic RNN. + Args: + time: int32 scalar Tensor. + output_ta_t: List of `TensorArray`s that represent the output. + state: nested tuple of vector tensors that represent the state. + Returns: + The tuple (time + 1, output_ta_t with updated flow, new_state). + """ + + input_t = tuple(ta.read(time) for ta in input_ta) + + # Restore some shape information + + for input_, shape in zip(input_t, inputs_got_shape): + input_.set_shape(shape[1:]) + + input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) + + if att_scores is not None: + + att_score = att_scores[:, time, :] + + call_cell = lambda: cell(input_t, state, att_score) + + else: + + call_cell = lambda: cell(input_t, state) + + if sequence_length is not None: + + (output, new_state) = _rnn_step( + + time=time, + + sequence_length=sequence_length, + + min_sequence_length=min_sequence_length, + + max_sequence_length=max_sequence_length, + + zero_output=zero_output, + + state=state, + + call_cell=call_cell, + + state_size=state_size, + + skip_conditionals=True) + + else: + + (output, new_state) = call_cell() + + # Pack state if using state tuples + + output = nest.flatten(output) + + output_ta_t = tuple( + + ta.write(time, out) for ta, out in zip(output_ta_t, output)) + + if att_scores is not None: + + return (time + 1, output_ta_t, new_state, att_scores) + + else: + + return (time + 1, output_ta_t, new_state) + + if att_scores is not None: + + _, output_final_ta, final_state, _ = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state, att_scores), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + else: + + _, output_final_ta, final_state = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + # Unpack final output if not using output tuples. + + final_outputs = tuple(ta.stack() for ta in output_final_ta) + + # Restore some shape information + + for output, output_size in zip(final_outputs, flat_output_size): + shape = _concat( + + [const_time_steps, const_batch_size], output_size, static=True) + + output.set_shape(shape) + + final_outputs = nest.pack_sequence_as( + + structure=cell.output_size, flat_sequence=final_outputs) + + return (final_outputs, final_state) \ No newline at end of file diff --git a/modelzoo/FNN/script/contrib/rnn_v2.py b/modelzoo/FNN/script/contrib/rnn_v2.py new file mode 100644 index 00000000000..a2bd625cd8b --- /dev/null +++ b/modelzoo/FNN/script/contrib/rnn_v2.py @@ -0,0 +1,1452 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +# + +# Licensed under the Apache License, Version 2.0 (the "License"); + +# you may not use this file except in compliance with the License. + +# You may obtain a copy of the License at + +# + +# http://www.apache.org/licenses/LICENSE-2.0 + +# + +# Unless required by applicable law or agreed to in writing, software + +# distributed under the License is distributed on an "AS IS" BASIS, + +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +# See the License for the specific language governing permissions and + +# limitations under the License. + +# ============================================================================== + + +"""RNN helpers for TensorFlow models. + + + + + +@@bidirectional_dynamic_rnn + +@@dynamic_rnn + +@@raw_rnn + +@@static_rnn + +@@static_state_saving_rnn + +@@static_bidirectional_rnn + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util import nest +import tensorflow as tf + + +def _like_rnncell_(cell): + """Checks that a given object is an RNNCell by using duck typing.""" + + conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"), + + hasattr(cell, "zero_state"), callable(cell)] + + return all(conditions) + + +# pylint: disable=protected-access + +_concat = rnn_cell_impl._concat +try: + _like_rnncell = rnn_cell_impl._like_rnncell +except: + _like_rnncell = _like_rnncell_ + + +# pylint: enable=protected-access + + +def _transpose_batch_time(x): + """Transpose the batch and time dimensions of a Tensor. + + + + Retains as much of the static shape information as possible. + + + + Args: + + x: A tensor of rank 2 or higher. + + + + Returns: + + x transposed along the first two dimensions. + + + + Raises: + + ValueError: if `x` is rank 1 or lower. + + """ + + x_static_shape = x.get_shape() + + if x_static_shape.ndims is not None and x_static_shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2, but saw shape: %s" % + + (x, x_static_shape)) + + x_rank = array_ops.rank(x) + + x_t = array_ops.transpose( + + x, array_ops.concat( + + ([1, 0], math_ops.range(2, x_rank)), axis=0)) + + x_t.set_shape( + + tensor_shape.TensorShape([ + + x_static_shape[1], x_static_shape[0] + + ]).concatenate(x_static_shape[2:])) + + return x_t + + +def _best_effort_input_batch_size(flat_input): + """Get static input batch size if available, with fallback to the dynamic one. + + + + Args: + + flat_input: An iterable of time major input Tensors of shape [max_time, + + batch_size, ...]. All inputs should have compatible batch sizes. + + + + Returns: + + The batch size in Python integer if available, or a scalar Tensor otherwise. + + + + Raises: + + ValueError: if there is any input with an invalid shape. + + """ + + for input_ in flat_input: + + shape = input_.shape + + if shape.ndims is None: + continue + + if shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2" % input_) + + batch_size = shape[1] + + if batch_size is not None: + return batch_size + + # Fallback to the dynamic batch size of the first input. + + return array_ops.shape(flat_input[0])[1] + + +def _infer_state_dtype(explicit_dtype, state): + """Infer the dtype of an RNN state. + + + + Args: + + explicit_dtype: explicitly declared dtype or None. + + state: RNN's hidden state. Must be a Tensor or a nested iterable containing + + Tensors. + + + + Returns: + + dtype: inferred dtype of hidden state. + + + + Raises: + + ValueError: if `state` has heterogeneous dtypes or is empty. + + """ + + if explicit_dtype is not None: + + return explicit_dtype + + elif nest.is_sequence(state): + + inferred_dtypes = [element.dtype for element in nest.flatten(state)] + + if not inferred_dtypes: + raise ValueError("Unable to infer dtype from empty state.") + + all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) + + if not all_same: + raise ValueError( + + "State has tensors of different inferred_dtypes. Unable to infer a " + + "single representative dtype.") + + return inferred_dtypes[0] + + else: + + return state.dtype + + +# pylint: disable=unused-argument + +def _rnn_step( + + time, sequence_length, min_sequence_length, max_sequence_length, + + zero_output, state, call_cell, state_size, skip_conditionals=False): + """Calculate one step of a dynamic RNN minibatch. + + + + Returns an (output, state) pair conditioned on the sequence_lengths. + + When skip_conditionals=False, the pseudocode is something like: + + + + if t >= max_sequence_length: + + return (zero_output, state) + + if t < min_sequence_length: + + return call_cell() + + + + # Selectively output zeros or output, old state or new state depending + + # on if we've finished calculating each row. + + new_output, new_state = call_cell() + + final_output = np.vstack([ + + zero_output if time >= sequence_lengths[r] else new_output_r + + for r, new_output_r in enumerate(new_output) + + ]) + + final_state = np.vstack([ + + state[r] if time >= sequence_lengths[r] else new_state_r + + for r, new_state_r in enumerate(new_state) + + ]) + + return (final_output, final_state) + + + + Args: + + time: Python int, the current time step + + sequence_length: int32 `Tensor` vector of size [batch_size] + + min_sequence_length: int32 `Tensor` scalar, min of sequence_length + + max_sequence_length: int32 `Tensor` scalar, max of sequence_length + + zero_output: `Tensor` vector of shape [output_size] + + state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, + + or a list/tuple of such tensors. + + call_cell: lambda returning tuple of (new_output, new_state) where + + new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. + + new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. + + state_size: The `cell.state_size` associated with the state. + + skip_conditionals: Python bool, whether to skip using the conditional + + calculations. This is useful for `dynamic_rnn`, where the input tensor + + matches `max_sequence_length`, and using conditionals just slows + + everything down. + + + + Returns: + + A tuple of (`final_output`, `final_state`) as given by the pseudocode above: + + final_output is a `Tensor` matrix of shape [batch_size, output_size] + + final_state is either a single `Tensor` matrix, or a tuple of such + + matrices (matching length and shapes of input `state`). + + + + Raises: + + ValueError: If the cell returns a state tuple whose length does not match + + that returned by `state_size`. + + """ + + # Convert state to a list for ease of use + + flat_state = nest.flatten(state) + + flat_zero_output = nest.flatten(zero_output) + + def _copy_one_through(output, new_output): + + # If the state contains a scalar value we simply pass it through. + + if output.shape.ndims == 0: + return new_output + + copy_cond = (time >= sequence_length) + + with ops.colocate_with(new_output): + return array_ops.where(copy_cond, output, new_output) + + def _copy_some_through(flat_new_output, flat_new_state): + + # Use broadcasting select to determine which values should get + + # the previous state & zero output, and which values should get + + # a calculated state & output. + + flat_new_output = [ + + _copy_one_through(zero_output, new_output) + + for zero_output, new_output in zip(flat_zero_output, flat_new_output)] + + flat_new_state = [ + + _copy_one_through(state, new_state) + + for state, new_state in zip(flat_state, flat_new_state)] + + return flat_new_output + flat_new_state + + def _maybe_copy_some_through(): + + """Run RNN step. Pass through either no or some past state.""" + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + flat_new_state = nest.flatten(new_state) + + flat_new_output = nest.flatten(new_output) + + return control_flow_ops.cond( + + # if t < min_seq_len: calculate and return everything + + time < min_sequence_length, lambda: flat_new_output + flat_new_state, + + # else copy some of it through + + lambda: _copy_some_through(flat_new_output, flat_new_state)) + + # TODO(ebrevdo): skipping these conditionals may cause a slowdown, + + # but benefits from removing cond() and its gradient. We should + + # profile with and without this switch here. + + if skip_conditionals: + + # Instead of using conditionals, perform the selective copy at all time + + # steps. This is faster when max_seq_len is equal to the number of unrolls + + # (which is typical for dynamic_rnn). + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + new_state = nest.flatten(new_state) + + new_output = nest.flatten(new_output) + + final_output_and_state = _copy_some_through(new_output, new_state) + + else: + + empty_update = lambda: flat_zero_output + flat_state + + final_output_and_state = control_flow_ops.cond( + + # if t >= max_seq_len: copy all state through, output zeros + + time >= max_sequence_length, empty_update, + + # otherwise calculation is required: copy some or all of it through + + _maybe_copy_some_through) + + if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): + raise ValueError("Internal error: state and output were not concatenated " + + "correctly.") + + final_output = final_output_and_state[:len(flat_zero_output)] + + final_state = final_output_and_state[len(flat_zero_output):] + + for output, flat_output in zip(final_output, flat_zero_output): + output.set_shape(flat_output.get_shape()) + + for substate, flat_substate in zip(final_state, flat_state): + substate.set_shape(flat_substate.get_shape()) + + final_output = nest.pack_sequence_as( + + structure=zero_output, flat_sequence=final_output) + + final_state = nest.pack_sequence_as( + + structure=state, flat_sequence=final_state) + + return final_output, final_state + + +def _reverse_seq(input_seq, lengths): + """Reverse a list of Tensors up to specified lengths. + + + + Args: + + input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) + + or nested tuples of tensors. + + lengths: A `Tensor` of dimension batch_size, containing lengths for each + + sequence in the batch. If "None" is specified, simply reverses + + the list. + + + + Returns: + + time-reversed sequence + + """ + + if lengths is None: + return list(reversed(input_seq)) + + flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) + + flat_results = [[] for _ in range(len(input_seq))] + + for sequence in zip(*flat_input_seq): + + input_shape = tensor_shape.unknown_shape( + + ndims=sequence[0].get_shape().ndims) + + for input_ in sequence: + input_shape.merge_with(input_.get_shape()) + + input_.set_shape(input_shape) + + # Join into (time, batch_size, depth) + + s_joined = array_ops.stack(sequence) + + # Reverse along dimension 0 + + s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) + + # Split again into list + + result = array_ops.unstack(s_reversed) + + for r, flat_result in zip(result, flat_results): + r.set_shape(input_shape) + + flat_result.append(r) + + results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) + + for input_, flat_result in zip(input_seq, flat_results)] + + return results + + +# +# def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, +# +# initial_state_fw=None, initial_state_bw=None, +# +# dtype=None, parallel_iterations=None, +# +# swap_memory=False, time_major=False, scope=None): +# +# """Creates a dynamic version of bidirectional recurrent neural network. +# +# +# +# Takes input and builds independent forward and backward RNNs. The input_size +# +# of forward and backward cell must match. The initial state for both directions +# +# is zero by default (but can be set optionally) and no intermediate states are +# +# ever returned -- the network is fully unrolled for the given (passed in) +# +# length(s) of the sequence(s) or completely unrolled if length(s) is not +# +# given. +# +# +# +# Args: +# +# cell_fw: An instance of RNNCell, to be used for forward direction. +# +# cell_bw: An instance of RNNCell, to be used for backward direction. +# +# inputs: The RNN inputs. +# +# If time_major == False (default), this must be a tensor of shape: +# +# `[batch_size, max_time, ...]`, or a nested tuple of such elements. +# +# If time_major == True, this must be a tensor of shape: +# +# `[max_time, batch_size, ...]`, or a nested tuple of such elements. +# +# sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, +# +# containing the actual lengths for each of the sequences in the batch. +# +# If not provided, all batch entries are assumed to be full sequences; and +# +# time reversal is applied from time `0` to `max_time` for each sequence. +# +# initial_state_fw: (optional) An initial state for the forward RNN. +# +# This must be a tensor of appropriate type and shape +# +# `[batch_size, cell_fw.state_size]`. +# +# If `cell_fw.state_size` is a tuple, this should be a tuple of +# +# tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. +# +# initial_state_bw: (optional) Same as for `initial_state_fw`, but using +# +# the corresponding properties of `cell_bw`. +# +# dtype: (optional) The data type for the initial states and expected output. +# +# Required if initial_states are not provided or RNN states have a +# +# heterogeneous dtype. +# +# parallel_iterations: (Default: 32). The number of iterations to run in +# +# parallel. Those operations which do not have any temporal dependency +# +# and can be run in parallel, will be. This parameter trades off +# +# time for space. Values >> 1 use more memory but take less time, +# +# while smaller values use less memory but computations take longer. +# +# swap_memory: Transparently swap the tensors produced in forward inference +# +# but needed for back prop from GPU to CPU. This allows training RNNs +# +# which would typically not fit on a single GPU, with very minimal (or no) +# +# performance penalty. +# +# time_major: The shape format of the `inputs` and `outputs` Tensors. +# +# If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. +# +# If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. +# +# Using `time_major = True` is a bit more efficient because it avoids +# +# transposes at the beginning and end of the RNN calculation. However, +# +# most TensorFlow data is batch-major, so by default this function +# +# accepts input and emits output in batch-major form. +# +# scope: VariableScope for the created subgraph; defaults to +# +# "bidirectional_rnn" +# +# +# +# Returns: +# +# A tuple (outputs, output_states) where: +# +# outputs: A tuple (output_fw, output_bw) containing the forward and +# +# the backward rnn output `Tensor`. +# +# If time_major == False (default), +# +# output_fw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_bw.output_size]`. +# +# If time_major == True, +# +# output_fw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_bw.output_size]`. +# +# It returns a tuple instead of a single concatenated `Tensor`, unlike +# +# in the `bidirectional_rnn`. If the concatenated one is preferred, +# +# the forward and backward outputs can be concatenated as +# +# `tf.concat(outputs, 2)`. +# +# output_states: A tuple (output_state_fw, output_state_bw) containing +# +# the forward and the backward final states of bidirectional rnn. +# +# +# +# Raises: +# +# TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. +# +# """ +# +# +# +# if not _like_rnncell(cell_fw): +# +# raise TypeError("cell_fw must be an instance of RNNCell") +# +# if not _like_rnncell(cell_bw): +# +# raise TypeError("cell_bw must be an instance of RNNCell") +# +# +# +# with vs.variable_scope(scope or "bidirectional_rnn"): +# +# # Forward direction +# +# with vs.variable_scope("fw") as fw_scope: +# +# output_fw, output_state_fw = dynamic_rnn( +# +# cell=cell_fw, inputs=inputs, sequence_length=sequence_length, +# +# initial_state=initial_state_fw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=fw_scope) +# +# +# +# # Backward direction +# +# if not time_major: +# +# time_dim = 1 +# +# batch_dim = 0 +# +# else: +# +# time_dim = 0 +# +# batch_dim = 1 +# +# +# +# def _reverse(input_, seq_lengths, seq_dim, batch_dim): +# +# if seq_lengths is not None: +# +# return array_ops.reverse_sequence( +# +# input=input_, seq_lengths=seq_lengths, +# +# seq_dim=seq_dim, batch_dim=batch_dim) +# +# else: +# +# return array_ops.reverse(input_, axis=[seq_dim]) +# +# +# +# with vs.variable_scope("bw") as bw_scope: +# +# inputs_reverse = _reverse( +# +# inputs, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# tmp, output_state_bw = dynamic_rnn( +# +# cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, +# +# initial_state=initial_state_bw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=bw_scope) +# +# +# +# output_bw = _reverse( +# +# tmp, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# +# +# outputs = (output_fw, output_bw) +# +# output_states = (output_state_fw, output_state_bw) +# +# +# +# return (outputs, output_states) +# + + +def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, + + dtype=None, parallel_iterations=None, swap_memory=False, + + time_major=False, scope=None): + """Creates a recurrent neural network specified by RNNCell `cell`. + + + + Performs fully dynamic unrolling of `inputs`. + + + + Example: + + + + ```python + + # create a BasicRNNCell + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + + + + # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] + + + + # defining initial state + + initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) + + + + # 'state' is a tensor of shape [batch_size, cell_state_size] + + outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, + + initial_state=initial_state, + + dtype=tf.float32) + + ``` + + + + ```python + + # create 2 LSTMCells + + rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] + + + + # create a RNN cell composed sequentially of a number of RNNCells + + multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) + + + + # 'outputs' is a tensor of shape [batch_size, max_time, 256] + + # 'state' is a N-tuple where N is the number of LSTMCells containing a + + # tf.contrib.rnn.LSTMStateTuple for each cell + + outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, + + inputs=data, + + dtype=tf.float32) + + ``` + + + + + + Args: + + cell: An instance of RNNCell. + + inputs: The RNN inputs. + + If `time_major == False` (default), this must be a `Tensor` of shape: + + `[batch_size, max_time, ...]`, or a nested tuple of such + + elements. + + If `time_major == True`, this must be a `Tensor` of shape: + + `[max_time, batch_size, ...]`, or a nested tuple of such + + elements. + + This may also be a (possibly nested) tuple of Tensors satisfying + + this property. The first two dimensions must match across all the inputs, + + but otherwise the ranks and other shape components may differ. + + In this case, input to `cell` at each time-step will replicate the + + structure of these tuples, except for the time dimension (from which the + + time is taken). + + The input to `cell` at each time step will be a `Tensor` or (possibly + + nested) tuple of Tensors each with dimensions `[batch_size, ...]`. + + sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. + + Used to copy-through state and zero-out outputs when past a batch + + element's sequence length. So it's more for correctness than performance. + + initial_state: (optional) An initial state for the RNN. + + If `cell.state_size` is an integer, this must be + + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. + + If `cell.state_size` is a tuple, this should be a tuple of + + tensors having shapes `[batch_size, s] for s in cell.state_size`. + + dtype: (optional) The data type for the initial state and expected output. + + Required if initial_state is not provided or RNN state has a heterogeneous + + dtype. + + parallel_iterations: (Default: 32). The number of iterations to run in + + parallel. Those operations which do not have any temporal dependency + + and can be run in parallel, will be. This parameter trades off + + time for space. Values >> 1 use more memory but take less time, + + while smaller values use less memory but computations take longer. + + swap_memory: Transparently swap the tensors produced in forward inference + + but needed for back prop from GPU to CPU. This allows training RNNs + + which would typically not fit on a single GPU, with very minimal (or no) + + performance penalty. + + time_major: The shape format of the `inputs` and `outputs` Tensors. + + If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. + + If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. + + Using `time_major = True` is a bit more efficient because it avoids + + transposes at the beginning and end of the RNN calculation. However, + + most TensorFlow data is batch-major, so by default this function + + accepts input and emits output in batch-major form. + + scope: VariableScope for the created subgraph; defaults to "rnn". + + + + Returns: + + A pair (outputs, state) where: + + + + outputs: The RNN output `Tensor`. + + + + If time_major == False (default), this will be a `Tensor` shaped: + + `[batch_size, max_time, cell.output_size]`. + + + + If time_major == True, this will be a `Tensor` shaped: + + `[max_time, batch_size, cell.output_size]`. + + + + Note, if `cell.output_size` is a (possibly nested) tuple of integers + + or `TensorShape` objects, then `outputs` will be a tuple having the + + same structure as `cell.output_size`, containing Tensors having shapes + + corresponding to the shape data in `cell.output_size`. + + + + state: The final state. If `cell.state_size` is an int, this + + will be shaped `[batch_size, cell.state_size]`. If it is a + + `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. + + If it is a (possibly nested) tuple of ints or `TensorShape`, this will + + be a tuple having the corresponding shapes. If cells are `LSTMCells` + + `state` will be a tuple containing a `LSTMStateTuple` for each cell. + + + + Raises: + + TypeError: If `cell` is not an instance of RNNCell. + + ValueError: If inputs is None or an empty list. + + """ + + if not _like_rnncell(cell): + raise TypeError("cell must be an instance of RNNCell") + + # By default, time_major==False and inputs are batch-major: shaped + + # [batch, time, depth] + + # For internal calculations, we transpose to [time, batch, depth] + + flat_input = nest.flatten(inputs) + + if not time_major: + # (B,T,D) => (T,B,D) + + flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] + + flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) + + parallel_iterations = parallel_iterations or 32 + + if sequence_length is not None: + + sequence_length = math_ops.to_int32(sequence_length) + + if sequence_length.get_shape().ndims not in (None, 1): + raise ValueError( + + "sequence_length must be a vector of length batch_size, " + + "but saw shape: %s" % sequence_length.get_shape()) + + sequence_length = array_ops.identity( # Just to find it in the graph. + + sequence_length, name="sequence_length") + + # Create a new scope in which the caching device is either + + # determined by the parent scope, or is set to place the cached + + # Variable using the same placement as for the rest of the RNN. + + try: + resue = tf.AUTO_REUSE + except: + resue = tf.compat.v1.AUTO_REUSE + + with vs.variable_scope(scope or "rnn",reuse=resue) as varscope:#TODO:user defined reuse + + if varscope.caching_device is None: + varscope.set_caching_device(lambda op: op.device) + + batch_size = _best_effort_input_batch_size(flat_input) + + if initial_state is not None: + + state = initial_state + + else: + + if not dtype: + raise ValueError("If there is no initial_state, you must give a dtype.") + + state = cell.zero_state(batch_size, dtype) + + def _assert_has_shape(x, shape): + + x_shape = array_ops.shape(x) + + packed_shape = array_ops.stack(shape) + + return control_flow_ops.Assert( + + math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), + + ["Expected shape for Tensor %s is " % x.name, + + packed_shape, " but saw shape: ", x_shape]) + + if sequence_length is not None: + # Perform some shape validation + + with ops.control_dependencies( + + [_assert_has_shape(sequence_length, [batch_size])]): + sequence_length = array_ops.identity( + + sequence_length, name="CheckSeqLen") + + inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) + + (outputs, final_state) = _dynamic_rnn_loop( + + cell, + + inputs, + + state, + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory, + + att_scores=att_scores, + + sequence_length=sequence_length, + + dtype=dtype) + + # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. + + # If we are performing batch-major calculations, transpose output back + + # to shape [batch, time, depth] + + if not time_major: + # (T,B,D) => (B,T,D) + + outputs = nest.map_structure(_transpose_batch_time, outputs) + + return (outputs, final_state) + + +def _dynamic_rnn_loop(cell, + + inputs, + + initial_state, + + parallel_iterations, + + swap_memory, + + att_scores=None, + + sequence_length=None, + + dtype=None): + """Internal implementation of Dynamic RNN. + + + + Args: + + cell: An instance of RNNCell. + + inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested + + tuple of such elements. + + initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if + + `cell.state_size` is a tuple, then this should be a tuple of + + tensors having shapes `[batch_size, s] for s in cell.state_size`. + + parallel_iterations: Positive Python int. + + swap_memory: A Python boolean + + sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. + + dtype: (optional) Expected dtype of output. If not specified, inferred from + + initial_state. + + + + Returns: + + Tuple `(final_outputs, final_state)`. + + final_outputs: + + A `Tensor` of shape `[time, batch_size, cell.output_size]`. If + + `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` + + objects, then this returns a (possibly nsted) tuple of Tensors matching + + the corresponding shapes. + + final_state: + + A `Tensor`, or possibly nested tuple of Tensors, matching in length + + and shapes to `initial_state`. + + + + Raises: + + ValueError: If the input depth cannot be inferred via shape inference + + from the inputs. + + """ + + state = initial_state + + assert isinstance(parallel_iterations, int), "parallel_iterations must be int" + + state_size = cell.state_size + + flat_input = nest.flatten(inputs) + + flat_output_size = nest.flatten(cell.output_size) + + # Construct an initial output + + input_shape = array_ops.shape(flat_input[0]) + + time_steps = input_shape[0] + + batch_size = _best_effort_input_batch_size(flat_input) + + inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) + + for input_ in flat_input) + + const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] + + for shape in inputs_got_shape: + + if not shape[2:].is_fully_defined(): + raise ValueError( + + "Input size (depth of inputs) must be accessible via shape inference," + + " but saw value None.") + + got_time_steps = shape[0] + + got_batch_size = shape[1] + + if const_time_steps != got_time_steps: + raise ValueError( + + "Time steps is not the same for all the elements in the input in a " + + "batch.") + + if const_batch_size != got_batch_size: + raise ValueError( + + "Batch_size is not the same for all the elements in the input.") + + # Prepare dynamic conditional copying of state & output + + def _create_zero_arrays(size): + + size = _concat(batch_size, size) + + return array_ops.zeros( + + array_ops.stack(size), _infer_state_dtype(dtype, state)) + + flat_zero_output = tuple(_create_zero_arrays(output) + + for output in flat_output_size) + + zero_output = nest.pack_sequence_as(structure=cell.output_size, + + flat_sequence=flat_zero_output) + + if sequence_length is not None: + min_sequence_length = math_ops.reduce_min(sequence_length) + + max_sequence_length = math_ops.reduce_max(sequence_length) + + time = array_ops.constant(0, dtype=dtypes.int32, name="time") + + with ops.name_scope("dynamic_rnn") as scope: + + base_name = scope + + def _create_ta(name, dtype): + + return tensor_array_ops.TensorArray(dtype=dtype, + + size=time_steps, + + tensor_array_name=base_name + name) + + output_ta = tuple(_create_ta("output_%d" % i, + + _infer_state_dtype(dtype, state)) + + for i in range(len(flat_output_size))) + + input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) + + for i in range(len(flat_input))) + + input_ta = tuple(ta.unstack(input_) + + for ta, input_ in zip(input_ta, flat_input)) + + def _time_step(time, output_ta_t, state, att_scores=None): + + """Take a time step of the dynamic RNN. + + + + Args: + + time: int32 scalar Tensor. + + output_ta_t: List of `TensorArray`s that represent the output. + + state: nested tuple of vector tensors that represent the state. + + + + Returns: + + The tuple (time + 1, output_ta_t with updated flow, new_state). + + """ + + input_t = tuple(ta.read(time) for ta in input_ta) + + # Restore some shape information + + for input_, shape in zip(input_t, inputs_got_shape): + input_.set_shape(shape[1:]) + + input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) + + if att_scores is not None: + + att_score = att_scores[:, time, :] + + call_cell = lambda: cell(input_t, state, att_score) + + else: + + call_cell = lambda: cell(input_t, state) + + if sequence_length is not None: + + (output, new_state) = _rnn_step( + + time=time, + + sequence_length=sequence_length, + + min_sequence_length=min_sequence_length, + + max_sequence_length=max_sequence_length, + + zero_output=zero_output, + + state=state, + + call_cell=call_cell, + + state_size=state_size, + + skip_conditionals=True) + + else: + + (output, new_state) = call_cell() + + # Pack state if using state tuples + + output = nest.flatten(output) + + output_ta_t = tuple( + + ta.write(time, out) for ta, out in zip(output_ta_t, output)) + + if att_scores is not None: + + return (time + 1, output_ta_t, new_state, att_scores) + + else: + + return (time + 1, output_ta_t, new_state) + + if att_scores is not None: + + _, output_final_ta, final_state, _ = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state, att_scores), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + else: + + _, output_final_ta, final_state = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + # Unpack final output if not using output tuples. + + final_outputs = tuple(ta.stack() for ta in output_final_ta) + + # Restore some shape information + + for output, output_size in zip(final_outputs, flat_output_size): + shape = _concat( + + [const_time_steps, const_batch_size], output_size, static=True) + + output.set_shape(shape) + + final_outputs = nest.pack_sequence_as( + + structure=cell.output_size, flat_sequence=final_outputs) + + return (final_outputs, final_state) diff --git a/modelzoo/FNN/script/contrib/utils.py b/modelzoo/FNN/script/contrib/utils.py new file mode 100644 index 00000000000..692f4ef6e89 --- /dev/null +++ b/modelzoo/FNN/script/contrib/utils.py @@ -0,0 +1,378 @@ +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.ops.rnn_cell import * +from tensorflow.python.util import nest + +_BIAS_VARIABLE_NAME = "bias" + +_WEIGHTS_VARIABLE_NAME = "kernel" + + +class _Linear_(object): + """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. + + + + Args: + + args: a 2D Tensor or a list of 2D, batch x n, Tensors. + + output_size: int, second dimension of weight variable. + + dtype: data type for variables. + + build_bias: boolean, whether to build a bias variable. + + bias_initializer: starting value to initialize the bias + + (default is all zeros). + + kernel_initializer: starting value to initialize the weight. + + + + Raises: + + ValueError: if inputs_shape is wrong. + + """ + + def __init__(self, + + args, + + output_size, + + build_bias, + + bias_initializer=None, + + kernel_initializer=None): + + self._build_bias = build_bias + + if args is None or (nest.is_sequence(args) and not args): + raise ValueError("`args` must be specified") + + if not nest.is_sequence(args): + + args = [args] + + self._is_sequence = False + + else: + + self._is_sequence = True + + # Calculate the total size of arguments on dimension 1. + + total_arg_size = 0 + + shapes = [a.get_shape() for a in args] + + for shape in shapes: + + if shape.ndims != 2: + raise ValueError( + "linear is expecting 2D arguments: %s" % shapes) + + if shape[1] is None: + + raise ValueError("linear expects shape[1] to be provided for shape %s, " + + "but saw %s" % (shape, shape[1])) + + else: + + total_arg_size += int(shape[1])#.value + + dtype = [a.dtype for a in args][0] + + scope = vs.get_variable_scope() + + with vs.variable_scope(scope) as outer_scope: + + self._weights = vs.get_variable( + + _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], + + dtype=dtype, + + initializer=kernel_initializer) + + if build_bias: + + with vs.variable_scope(outer_scope) as inner_scope: + + inner_scope.set_partitioner(None) + + if bias_initializer is None: + bias_initializer = init_ops.constant_initializer( + 0.0, dtype=dtype) + + self._biases = vs.get_variable( + + _BIAS_VARIABLE_NAME, [output_size], + + dtype=dtype, + + initializer=bias_initializer) + + def __call__(self, args): + + if not self._is_sequence: + args = [args] + + if len(args) == 1: + + res = math_ops.matmul(args[0], self._weights) + + else: + + res = math_ops.matmul(array_ops.concat(args, 1), self._weights) + + if self._build_bias: + res = nn_ops.bias_add(res, self._biases) + + return res + + +try: + from tensorflow.python.ops.rnn_cell_impl import _Linear +except: + _Linear = _Linear_ + + +class QAAttGRUCell(RNNCell): + """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). + + Args: + + num_units: int, The number of units in the GRU cell. + + activation: Nonlinearity to use. Default: `tanh`. + + reuse: (optional) Python boolean describing whether to reuse variables + + in an existing scope. If not `True`, and the existing scope already has + + the given variables, an error is raised. + + kernel_initializer: (optional) The initializer to use for the weight and + + projection matrices. + + bias_initializer: (optional) The initializer to use for the bias. + + """ + + def __init__(self, + + num_units, + + activation=None, + + reuse=None, + + kernel_initializer=None, + + bias_initializer=None): + + super(QAAttGRUCell, self).__init__(_reuse=reuse) + + self._num_units = num_units + + self._activation = activation or math_ops.tanh + + self._kernel_initializer = kernel_initializer + + self._bias_initializer = bias_initializer + + self._gate_linear = None + + self._candidate_linear = None + + @property + def state_size(self): + + return self._num_units + + @property + def output_size(self): + + return self._num_units + + def __call__(self, inputs, state, att_score): + + return self.call(inputs, state, att_score) + + def call(self, inputs, state, att_score=None): + """Gated recurrent unit (GRU) with nunits cells.""" + + if self._gate_linear is None: + + bias_ones = self._bias_initializer + + if self._bias_initializer is None: + bias_ones = init_ops.constant_initializer( + 1.0, dtype=inputs.dtype) + + with vs.variable_scope("gates"): # Reset gate and update gate. + + self._gate_linear = _Linear( + + [inputs, state], + + 2 * self._num_units, + + True, + + bias_initializer=bias_ones, + + kernel_initializer=self._kernel_initializer) + + value = math_ops.sigmoid(self._gate_linear([inputs, state])) + + r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) + + r_state = r * state + + if self._candidate_linear is None: + with vs.variable_scope("candidate"): + self._candidate_linear = _Linear( + + [inputs, r_state], + + self._num_units, + + True, + + bias_initializer=self._bias_initializer, + + kernel_initializer=self._kernel_initializer) + + c = self._activation(self._candidate_linear([inputs, r_state])) + + new_h = (1. - att_score) * state + att_score * c + + return new_h, new_h + + +class VecAttGRUCell(RNNCell): + """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). + + Args: + + num_units: int, The number of units in the GRU cell. + + activation: Nonlinearity to use. Default: `tanh`. + + reuse: (optional) Python boolean describing whether to reuse variables + + in an existing scope. If not `True`, and the existing scope already has + + the given variables, an error is raised. + + kernel_initializer: (optional) The initializer to use for the weight and + + projection matrices. + + bias_initializer: (optional) The initializer to use for the bias. + + """ + + def __init__(self, + + num_units, + + activation=None, + + reuse=None, + + kernel_initializer=None, + + bias_initializer=None): + + super(VecAttGRUCell, self).__init__(_reuse=reuse) + + self._num_units = num_units + + self._activation = activation or math_ops.tanh + + self._kernel_initializer = kernel_initializer + + self._bias_initializer = bias_initializer + + self._gate_linear = None + + self._candidate_linear = None + + @property + def state_size(self): + + return self._num_units + + @property + def output_size(self): + + return self._num_units + + def __call__(self, inputs, state, att_score): + + return self.call(inputs, state, att_score) + + def call(self, inputs, state, att_score=None): + """Gated recurrent unit (GRU) with nunits cells.""" + + if self._gate_linear is None: + + bias_ones = self._bias_initializer + + if self._bias_initializer is None: + bias_ones = init_ops.constant_initializer( + 1.0, dtype=inputs.dtype) + + with vs.variable_scope("gates"): # Reset gate and update gate. + + self._gate_linear = _Linear( + + [inputs, state], + + 2 * self._num_units, + + True, + + bias_initializer=bias_ones, + + kernel_initializer=self._kernel_initializer) + + value = math_ops.sigmoid(self._gate_linear([inputs, state])) + + r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) + + r_state = r * state + + if self._candidate_linear is None: + with vs.variable_scope("candidate"): + self._candidate_linear = _Linear( + + [inputs, r_state], + + self._num_units, + + True, + + bias_initializer=self._bias_initializer, + + kernel_initializer=self._kernel_initializer) + + c = self._activation(self._candidate_linear([inputs, r_state])) + + u = (1.0 - att_score) * u + + new_h = u * state + (1 - u) * c + + return new_h, new_h diff --git a/modelzoo/FNN/script/estimator/__init__.py b/modelzoo/FNN/script/estimator/__init__.py new file mode 100644 index 00000000000..cf4f59d6c09 --- /dev/null +++ b/modelzoo/FNN/script/estimator/__init__.py @@ -0,0 +1 @@ +from .models import * \ No newline at end of file diff --git a/modelzoo/FNN/script/estimator/feature_column.py b/modelzoo/FNN/script/estimator/feature_column.py new file mode 100644 index 00000000000..c8d7a6cd013 --- /dev/null +++ b/modelzoo/FNN/script/estimator/feature_column.py @@ -0,0 +1,52 @@ +import tensorflow as tf +from tensorflow.python.feature_column.feature_column import _EmbeddingColumn + +from .utils import LINEAR_SCOPE_NAME, variable_scope, get_collection, get_GraphKeys, input_layer, get_losses + + +def linear_model(features, linear_feature_columns): + if tf.__version__ >= '2.0.0': + linear_logits = tf.compat.v1.feature_column.linear_model(features, linear_feature_columns) + else: + linear_logits = tf.feature_column.linear_model(features, linear_feature_columns) + return linear_logits + + +def get_linear_logit(features, linear_feature_columns, l2_reg_linear=0): + with variable_scope(LINEAR_SCOPE_NAME): + if not linear_feature_columns: + linear_logits = tf.Variable([[0.0]], name='bias_weights') + else: + + linear_logits = linear_model(features, linear_feature_columns) + + if l2_reg_linear > 0: + for var in get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME)[:-1]: + get_losses().add_loss(l2_reg_linear * tf.nn.l2_loss(var, name=var.name.split(":")[0] + "_l2loss"), + get_GraphKeys().REGULARIZATION_LOSSES) + return linear_logits + + +def input_from_feature_columns(features, feature_columns, l2_reg_embedding=0.0): + dense_value_list = [] + sparse_emb_list = [] + for feat in feature_columns: + if is_embedding(feat): + sparse_emb = tf.expand_dims(input_layer(features, [feat]), axis=1) + sparse_emb_list.append(sparse_emb) + if l2_reg_embedding > 0: + get_losses().add_loss(l2_reg_embedding * tf.nn.l2_loss(sparse_emb, name=feat.name + "_l2loss"), + get_GraphKeys().REGULARIZATION_LOSSES) + + else: + dense_value_list.append(input_layer(features, [feat])) + + return sparse_emb_list, dense_value_list + + +def is_embedding(feature_column): + try: + from tensorflow.python.feature_column.feature_column_v2 import EmbeddingColumn + except ImportError: + EmbeddingColumn = _EmbeddingColumn + return isinstance(feature_column, (_EmbeddingColumn, EmbeddingColumn)) diff --git a/modelzoo/FNN/script/estimator/inputs.py b/modelzoo/FNN/script/estimator/inputs.py new file mode 100644 index 00000000000..2c175a9934e --- /dev/null +++ b/modelzoo/FNN/script/estimator/inputs.py @@ -0,0 +1,52 @@ +import tensorflow as tf + + +def input_fn_pandas(df, features, label=None, batch_size=256, num_epochs=1, shuffle=False, queue_capacity_factor=10, + num_threads=1): + if label is not None: + y = df[label] + else: + y = None + if tf.__version__ >= "2.0.0": + return tf.compat.v1.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, + num_epochs=num_epochs, + shuffle=shuffle, + queue_capacity=batch_size * queue_capacity_factor, + num_threads=num_threads) + + return tf.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, num_epochs=num_epochs, + shuffle=shuffle, queue_capacity=batch_size * queue_capacity_factor, + num_threads=num_threads) + + +def input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, num_parallel_calls=8, + shuffle_factor=10, prefetch_factor=1, + ): + def _parse_examples(serial_exmp): + try: + features = tf.parse_single_example(serial_exmp, features=feature_description) + except AttributeError: + features = tf.io.parse_single_example(serial_exmp, features=feature_description) + if label is not None: + labels = features.pop(label) + return features, labels + return features + + def input_fn(): + dataset = tf.data.TFRecordDataset(filenames) + dataset = dataset.map(_parse_examples, num_parallel_calls=num_parallel_calls) + if shuffle_factor > 0: + dataset = dataset.shuffle(buffer_size=batch_size * shuffle_factor) + + dataset = dataset.repeat(num_epochs).batch(batch_size) + + if prefetch_factor > 0: + dataset = dataset.prefetch(buffer_size=batch_size * prefetch_factor) + try: + iterator = dataset.make_one_shot_iterator() + except AttributeError: + iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + + return iterator.get_next() + + return input_fn diff --git a/modelzoo/FNN/script/estimator/utils.py b/modelzoo/FNN/script/estimator/utils.py new file mode 100644 index 00000000000..5d722515f6b --- /dev/null +++ b/modelzoo/FNN/script/estimator/utils.py @@ -0,0 +1,217 @@ +import tensorflow as tf +from tensorflow.python.estimator.canned.head import _Head +from tensorflow.python.estimator.canned.optimizers import get_optimizer_instance + +LINEAR_SCOPE_NAME = 'linear' +DNN_SCOPE_NAME = 'dnn' + + +def _summary_key(head_name, val): + return '%s/%s' % (val, head_name) if head_name else val + + +class Head(_Head): + + def __init__(self, task, + name=None): + self._task = task + self._name = name + + @property + def name(self): + return self._name + + @property + def logits_dimension(self): + return 1 + + def _eval_metric_ops(self, + labels, + logits, + predictions, + unweighted_loss, + weights=None): + + labels = to_float(labels) + predictions = to_float(predictions) + + # with name_scope(None, 'metrics', (labels, logits, predictions, + # unweighted_loss, weights)): + metrics = get_metrics() + losses = get_losses() + + metric_ops = { + _summary_key(self._name, "prediction/mean"): metrics.mean(predictions, weights=weights), + _summary_key(self._name, "label/mean"): metrics.mean(labels, weights=weights), + } + + summary_scalar("prediction/mean", metric_ops[_summary_key(self._name, "prediction/mean")][1]) + summary_scalar("label/mean", metric_ops[_summary_key(self._name, "label/mean")][1]) + + + mean_loss = losses.compute_weighted_loss( + unweighted_loss, weights=1.0, reduction=losses.Reduction.MEAN) + + if self._task == "binary": + metric_ops[_summary_key(self._name, "LogLoss")] = metrics.mean(mean_loss, weights=weights, ) + summary_scalar("LogLoss", mean_loss) + + metric_ops[_summary_key(self._name, "AUC")] = metrics.auc(labels, predictions, weights=weights) + summary_scalar("AUC", metric_ops[_summary_key(self._name, "AUC")][1]) + else: + + metric_ops[_summary_key(self._name, "MSE")] = metrics.mean_squared_error(labels, predictions, + weights=weights) + summary_scalar("MSE", mean_loss) + + metric_ops[_summary_key(self._name, "MAE")] = metrics.mean_absolute_error(labels, predictions, + weights=weights) + summary_scalar("MAE", metric_ops[_summary_key(self._name, "MAE")][1]) + + return metric_ops + + def create_loss(self, features, mode, logits, labels): + del mode, features # Unused for this head. + losses = get_losses() + if self._task == "binary": + loss = losses.sigmoid_cross_entropy(labels, logits, reduction=losses.Reduction.NONE) + else: + loss = losses.mean_squared_error(labels, logits, reduction=losses.Reduction.NONE) + return loss + + def create_estimator_spec( + self, features, mode, logits, labels=None, train_op_fn=None, training_chief_hooks=None): + # with name_scope('head'): + logits = tf.reshape(logits, [-1, 1]) + if self._task == 'binary': + pred = tf.sigmoid(logits) + else: + pred = logits + + predictions = {"pred": pred, "logits": logits} + export_outputs = {"predict": tf.estimator.export.PredictOutput(predictions)} + if mode == tf.estimator.ModeKeys.PREDICT: + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + export_outputs=export_outputs) + + labels = tf.reshape(labels, [-1, 1]) + + unweighted_loss = self.create_loss(features, mode, logits, labels) + + losses = get_losses() + loss = losses.compute_weighted_loss( + unweighted_loss, weights=1.0, reduction=losses.Reduction.SUM) + reg_loss = losses.get_regularization_loss() + + training_loss = loss + reg_loss + + eval_metric_ops = self._eval_metric_ops(labels, logits, pred, unweighted_loss) + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=training_loss, + train_op=train_op_fn(training_loss), + eval_metric_ops=eval_metric_ops, + training_chief_hooks=training_chief_hooks) + + +def deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks): + linear_optimizer = get_optimizer_instance(linear_optimizer, 0.005) + dnn_optimizer = get_optimizer_instance(dnn_optimizer, 0.01) + train_op_fn = get_train_op_fn(linear_optimizer, dnn_optimizer) + + head = Head(task) + return head.create_estimator_spec(features=features, + mode=mode, + labels=labels, + train_op_fn=train_op_fn, + logits=logits, training_chief_hooks=training_chief_hooks) + + +def get_train_op_fn(linear_optimizer, dnn_optimizer): + def _train_op_fn(loss): + train_ops = [] + try: + global_step = tf.train.get_global_step() + except AttributeError: + global_step = tf.compat.v1.train.get_global_step() + linear_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME) + dnn_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, DNN_SCOPE_NAME) + + if len(dnn_var_list) > 0: + train_ops.append( + dnn_optimizer.minimize( + loss, + var_list=dnn_var_list)) + if len(linear_var_list) > 0: + train_ops.append( + linear_optimizer.minimize( + loss, + var_list=linear_var_list)) + + train_op = tf.group(*train_ops) + with tf.control_dependencies([train_op]): + try: + return tf.assign_add(global_step, 1).op + except AttributeError: + return tf.compat.v1.assign_add(global_step, 1).op + + return _train_op_fn + + +def variable_scope(name_or_scope): + try: + return tf.variable_scope(name_or_scope) + except AttributeError: + return tf.compat.v1.variable_scope(name_or_scope) + +def get_collection(key, scope=None): + try: + return tf.get_collection(key, scope=scope) + except AttributeError: + return tf.compat.v1.get_collection(key, scope=scope) + + +def get_GraphKeys(): + try: + return tf.GraphKeys + except AttributeError: + return tf.compat.v1.GraphKeys + + +def get_losses(): + try: + return tf.compat.v1.losses + except AttributeError: + return tf.losses + + +def input_layer(features, feature_columns): + try: + return tf.feature_column.input_layer(features, feature_columns) + except AttributeError: + return tf.compat.v1.feature_column.input_layer(features, feature_columns) + + +def get_metrics(): + try: + return tf.compat.v1.metrics + except AttributeError: + return tf.metrics + + +def to_float(x, name="ToFloat"): + try: + return tf.to_float(x, name) + except AttributeError: + return tf.compat.v1.to_float(x, name) + + +def summary_scalar(name, data): + try: + tf.summary.scalar(name, data) + except AttributeError: # tf version 2.5.0+:AttributeError: module 'tensorflow._api.v2.summary' has no attribute 'scalar' + tf.compat.v1.summary.scalar(name, data) \ No newline at end of file diff --git a/modelzoo/FNN/script/feature_column.py b/modelzoo/FNN/script/feature_column.py new file mode 100644 index 00000000000..0569e32d3c3 --- /dev/null +++ b/modelzoo/FNN/script/feature_column.py @@ -0,0 +1,220 @@ +import tensorflow as tf +from collections import namedtuple, OrderedDict +from copy import copy +from itertools import chain + +from tensorflow.python.keras.initializers import RandomNormal, Zeros +from tensorflow.python.keras.layers import Input, Lambda + +from .inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \ + get_varlen_pooling_list, mergeDict +from .layers import Linear +from .layers.utils import concat_func +#from keras import backend as K +import pandas as pd +import numpy as np + + +DEFAULT_GROUP_NAME = "default_group" + + +class SparseFeat(namedtuple('SparseFeat', + ['name', 'vocabulary_size', 'embedding_dim', 'use_hash', 'vocabulary_path', 'dtype', 'embeddings_initializer', + 'embedding_name', + 'group_name', 'trainable'])): + __slots__ = () + + def __new__(cls, name, vocabulary_size, embedding_dim=4, use_hash=False, vocabulary_path=None, dtype="int32", embeddings_initializer=None, + embedding_name=None, + group_name=DEFAULT_GROUP_NAME, trainable=True): + + if embedding_dim == "auto": + embedding_dim = 6 * int(pow(vocabulary_size, 0.25)) + if embeddings_initializer is None: + embeddings_initializer = RandomNormal(mean=0.0, stddev=0.0001, seed=2020) + + + + if embedding_name is None: + embedding_name = name + + return super(SparseFeat, cls).__new__(cls, name, vocabulary_size, embedding_dim, use_hash, vocabulary_path, dtype, + embeddings_initializer, + embedding_name, group_name, trainable) + + def __hash__(self): + return self.name.__hash__() + + +class VarLenSparseFeat(namedtuple('VarLenSparseFeat', + ['sparsefeat', 'maxlen', 'combiner', 'length_name', 'weight_name', 'weight_norm'])): + __slots__ = () + + def __new__(cls, sparsefeat, maxlen, combiner="mean", length_name=None, weight_name=None, weight_norm=True): + return super(VarLenSparseFeat, cls).__new__(cls, sparsefeat, maxlen, combiner, length_name, weight_name, + weight_norm) + + @property + def name(self): + return self.sparsefeat.name + + @property + def vocabulary_size(self): + return self.sparsefeat.vocabulary_size + + @property + def embedding_dim(self): + return self.sparsefeat.embedding_dim + + @property + def use_hash(self): + return self.sparsefeat.use_hash + + @property + def vocabulary_path(self): + return self.sparsefeat.vocabulary_path + + @property + def dtype(self): + return self.sparsefeat.dtype + + @property + def embeddings_initializer(self): + return self.sparsefeat.embeddings_initializer + + @property + def embedding_name(self): + return self.sparsefeat.embedding_name + + @property + def group_name(self): + return self.sparsefeat.group_name + + @property + def trainable(self): + return self.sparsefeat.trainable + + def __hash__(self): + return self.name.__hash__() + + +class DenseFeat(namedtuple('DenseFeat', ['name', 'dimension', 'dtype', 'transform_fn'])): + """ Dense feature + Args: + name: feature name, + dimension: dimension of the feature, default = 1. + dtype: dtype of the feature, default="float32". + transform_fn: If not `None` , a function that can be used to transform + values of the feature. the function takes the input Tensor as its + argument, and returns the output Tensor. + (e.g. lambda x: (x - 3.0) / 4.2). + """ + __slots__ = () + + def __new__(cls, name, dimension=1, dtype="float32", transform_fn=None): + return super(DenseFeat, cls).__new__(cls, name, dimension, dtype, transform_fn) + + def __hash__(self): + return self.name.__hash__() + + # def __eq__(self, other): + # if self.name == other.name: + # return True + # return False + + # def __repr__(self): + # return 'DenseFeat:'+self.name + + +def get_feature_names(feature_columns): + features = build_input_features(feature_columns) + return list(features.keys()) + + +def build_input_features(feature_columns, prefix=''): + input_features = OrderedDict() + for fc in feature_columns: + if isinstance(fc, SparseFeat): + input_features[fc.name] = Input( + shape=(1,), name=prefix + fc.name, dtype=fc.dtype) + elif isinstance(fc, DenseFeat): + input_features[fc.name] = Input( + shape=(fc.dimension,), name=prefix + fc.name, dtype=fc.dtype) + elif isinstance(fc, VarLenSparseFeat): + input_features[fc.name] = Input(shape=(fc.maxlen,), name=prefix + fc.name, + dtype=fc.dtype) + if fc.weight_name is not None: + input_features[fc.weight_name] = Input(shape=(fc.maxlen, 1), name=prefix + fc.weight_name, + dtype="float32") + if fc.length_name is not None: + input_features[fc.length_name] = Input((1,), name=prefix + fc.length_name, dtype='int32') + + else: + raise TypeError("Invalid feature column type,got", type(fc)) + + return input_features + + +def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear', + l2_reg=0, sparse_feat_refine_weight=None): + linear_feature_columns = copy(feature_columns) + for i in range(len(linear_feature_columns)): + if isinstance(linear_feature_columns[i], SparseFeat): + linear_feature_columns[i] = linear_feature_columns[i]._replace(embedding_dim=1, + embeddings_initializer=Zeros()) + if isinstance(linear_feature_columns[i], VarLenSparseFeat): + linear_feature_columns[i] = linear_feature_columns[i]._replace( + sparsefeat=linear_feature_columns[i].sparsefeat._replace(embedding_dim=1, + embeddings_initializer=Zeros())) + + linear_emb_list = [input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, + prefix=prefix + str(i))[0] for i in range(units)] + _, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix) + + linear_logit_list = [] + for i in range(units): + + if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0: + sparse_input = concat_func(linear_emb_list[i]) + dense_input = concat_func(dense_input_list) + if sparse_feat_refine_weight is not None: + sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))( + [sparse_input, sparse_feat_refine_weight]) + linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input]) + elif len(linear_emb_list[i]) > 0: + sparse_input = concat_func(linear_emb_list[i]) + if sparse_feat_refine_weight is not None: + sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))( + [sparse_input, sparse_feat_refine_weight]) + linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input) + elif len(dense_input_list) > 0: + dense_input = concat_func(dense_input_list) + linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input) + else: #empty feature_columns + return Lambda(lambda x: tf.constant([[0.0]]))(list(features.values())[0]) + linear_logit_list.append(linear_logit) + + return concat_func(linear_logit_list) + + +def input_from_feature_columns(features, feature_columns, l2_reg, seed, prefix='', seq_mask_zero=True, + support_dense=True, support_group=False): + sparse_feature_columns = list( + filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else [] + varlen_sparse_feature_columns = list( + filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else [] + + embedding_matrix_dict = create_embedding_matrix(feature_columns, l2_reg, seed, prefix=prefix, + seq_mask_zero=seq_mask_zero) + group_sparse_embedding_dict = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns) + dense_value_list = get_dense_input(features, feature_columns) + if not support_dense and len(dense_value_list) > 0: + raise ValueError("DenseFeat is not supported in dnn_feature_columns") + + sequence_embed_dict = varlen_embedding_lookup(embedding_matrix_dict, features, varlen_sparse_feature_columns) + group_varlen_sparse_embedding_dict = get_varlen_pooling_list(sequence_embed_dict, features, + varlen_sparse_feature_columns) + group_embedding_dict = mergeDict(group_sparse_embedding_dict, group_varlen_sparse_embedding_dict) + if not support_group: + group_embedding_dict = list(chain.from_iterable(group_embedding_dict.values())) + return group_embedding_dict, dense_value_list diff --git a/modelzoo/FNN/script/inputs.py b/modelzoo/FNN/script/inputs.py new file mode 100644 index 00000000000..d567f846265 --- /dev/null +++ b/modelzoo/FNN/script/inputs.py @@ -0,0 +1,155 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +from collections import defaultdict +from itertools import chain + +from tensorflow.python.keras.layers import Embedding, Lambda +from tensorflow.python.keras.regularizers import l2 + +from .layers.sequence import SequencePoolingLayer, WeightedSequenceLayer +from .layers.utils import Hash + + +def get_inputs_list(inputs): + return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs))))) + + +def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, l2_reg, + prefix='sparse_', seq_mask_zero=True): + sparse_embedding = {} + for feat in sparse_feature_columns: + emb = Embedding(feat.vocabulary_size, feat.embedding_dim, + embeddings_initializer=feat.embeddings_initializer, + embeddings_regularizer=l2(l2_reg), + name=prefix + '_emb_' + feat.embedding_name) + emb.trainable = feat.trainable + sparse_embedding[feat.embedding_name] = emb + + if varlen_sparse_feature_columns and len(varlen_sparse_feature_columns) > 0: + for feat in varlen_sparse_feature_columns: + # if feat.name not in sparse_embedding: + emb = Embedding(feat.vocabulary_size, feat.embedding_dim, + embeddings_initializer=feat.embeddings_initializer, + embeddings_regularizer=l2( + l2_reg), + name=prefix + '_seq_emb_' + feat.name, + mask_zero=seq_mask_zero) + emb.trainable = feat.trainable + sparse_embedding[feat.embedding_name] = emb + return sparse_embedding + + +def get_embedding_vec_list(embedding_dict, input_dict, sparse_feature_columns, return_feat_list=(), mask_feat_list=()): + embedding_vec_list = [] + for fg in sparse_feature_columns: + feat_name = fg.name + if len(return_feat_list) == 0 or feat_name in return_feat_list: + if fg.use_hash: + lookup_idx = Hash(fg.vocabulary_size, mask_zero=(feat_name in mask_feat_list), vocabulary_path=fg.vocabulary_path)(input_dict[feat_name]) + else: + lookup_idx = input_dict[feat_name] + + embedding_vec_list.append(embedding_dict[feat_name](lookup_idx)) + + return embedding_vec_list + + +def create_embedding_matrix(feature_columns, l2_reg, seed, prefix="", seq_mask_zero=True): + from . import feature_column as fc_lib + + sparse_feature_columns = list( + filter(lambda x: isinstance(x, fc_lib.SparseFeat), feature_columns)) if feature_columns else [] + varlen_sparse_feature_columns = list( + filter(lambda x: isinstance(x, fc_lib.VarLenSparseFeat), feature_columns)) if feature_columns else [] + sparse_emb_dict = create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, + l2_reg, prefix=prefix + 'sparse', seq_mask_zero=seq_mask_zero) + return sparse_emb_dict + + +def embedding_lookup(sparse_embedding_dict, sparse_input_dict, sparse_feature_columns, return_feat_list=(), + mask_feat_list=(), to_list=False): + group_embedding_dict = defaultdict(list) + for fc in sparse_feature_columns: + feature_name = fc.name + embedding_name = fc.embedding_name + if (len(return_feat_list) == 0 or feature_name in return_feat_list): + if fc.use_hash: + lookup_idx = Hash(fc.vocabulary_size, mask_zero=(feature_name in mask_feat_list), vocabulary_path=fc.vocabulary_path)( + sparse_input_dict[feature_name]) + else: + lookup_idx = sparse_input_dict[feature_name] + + group_embedding_dict[fc.group_name].append(sparse_embedding_dict[embedding_name](lookup_idx)) + if to_list: + return list(chain.from_iterable(group_embedding_dict.values())) + return group_embedding_dict + + +def varlen_embedding_lookup(embedding_dict, sequence_input_dict, varlen_sparse_feature_columns): + varlen_embedding_vec_dict = {} + for fc in varlen_sparse_feature_columns: + feature_name = fc.name + embedding_name = fc.embedding_name + if fc.use_hash: + lookup_idx = Hash(fc.vocabulary_size, mask_zero=True, vocabulary_path=fc.vocabulary_path)(sequence_input_dict[feature_name]) + else: + lookup_idx = sequence_input_dict[feature_name] + varlen_embedding_vec_dict[feature_name] = embedding_dict[embedding_name](lookup_idx) + return varlen_embedding_vec_dict + + +def get_varlen_pooling_list(embedding_dict, features, varlen_sparse_feature_columns, to_list=False): + pooling_vec_list = defaultdict(list) + for fc in varlen_sparse_feature_columns: + feature_name = fc.name + combiner = fc.combiner + feature_length_name = fc.length_name + if feature_length_name is not None: + if fc.weight_name is not None: + seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm)( + [embedding_dict[feature_name], features[feature_length_name], features[fc.weight_name]]) + else: + seq_input = embedding_dict[feature_name] + vec = SequencePoolingLayer(combiner, supports_masking=False)( + [seq_input, features[feature_length_name]]) + else: + if fc.weight_name is not None: + seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm, supports_masking=True)( + [embedding_dict[feature_name], features[fc.weight_name]]) + else: + seq_input = embedding_dict[feature_name] + vec = SequencePoolingLayer(combiner, supports_masking=True)( + seq_input) + pooling_vec_list[fc.group_name].append(vec) + if to_list: + return chain.from_iterable(pooling_vec_list.values()) + return pooling_vec_list + + +def get_dense_input(features, feature_columns): + from . import feature_column as fc_lib + dense_feature_columns = list( + filter(lambda x: isinstance(x, fc_lib.DenseFeat), feature_columns)) if feature_columns else [] + dense_input_list = [] + for fc in dense_feature_columns: + if fc.transform_fn is None: + dense_input_list.append(features[fc.name]) + else: + transform_result = Lambda(fc.transform_fn)(features[fc.name]) + dense_input_list.append(transform_result) + return dense_input_list + + +def mergeDict(a, b): + c = defaultdict(list) + for k, v in a.items(): + c[k].extend(v) + for k, v in b.items(): + c[k].extend(v) + return c diff --git a/modelzoo/FNN/script/layers/__init__.py b/modelzoo/FNN/script/layers/__init__.py new file mode 100644 index 00000000000..1bfd40effe7 --- /dev/null +++ b/modelzoo/FNN/script/layers/__init__.py @@ -0,0 +1,52 @@ +import tensorflow as tf + +from .activation import Dice +from .core import DNN, LocalActivationUnit, PredictionLayer +from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet, CrossNetMix, + InnerProductLayer, InteractingLayer, + OutterProductLayer, FGCNNLayer, SENETLayer, BilinearInteraction, + FieldWiseBiInteraction, FwFMLayer, FEFMLayer) +from .normalization import LayerNormalization +from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM, + KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer, + Transformer, DynamicGRU,PositionEncoding) + +from .utils import NoMask, Hash, Linear, _Add, combined_dnn_input, softmax, reduce_sum + +custom_objects = {'tf': tf, + 'InnerProductLayer': InnerProductLayer, + 'OutterProductLayer': OutterProductLayer, + 'DNN': DNN, + 'PredictionLayer': PredictionLayer, + 'FM': FM, + 'AFMLayer': AFMLayer, + 'CrossNet': CrossNet, + 'CrossNetMix': CrossNetMix, + 'BiInteractionPooling': BiInteractionPooling, + 'LocalActivationUnit': LocalActivationUnit, + 'Dice': Dice, + 'SequencePoolingLayer': SequencePoolingLayer, + 'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer, + 'CIN': CIN, + 'InteractingLayer': InteractingLayer, + 'LayerNormalization': LayerNormalization, + 'BiLSTM': BiLSTM, + 'Transformer': Transformer, + 'NoMask': NoMask, + 'BiasEncoding': BiasEncoding, + 'KMaxPooling': KMaxPooling, + 'FGCNNLayer': FGCNNLayer, + 'Hash': Hash, + 'Linear': Linear, + 'DynamicGRU': DynamicGRU, + 'SENETLayer': SENETLayer, + 'BilinearInteraction': BilinearInteraction, + 'WeightedSequenceLayer': WeightedSequenceLayer, + '_Add': _Add, + 'FieldWiseBiInteraction': FieldWiseBiInteraction, + 'FwFMLayer': FwFMLayer, + 'softmax': softmax, + 'FEFMLayer': FEFMLayer, + 'reduce_sum': reduce_sum, + 'PositionEncoding':PositionEncoding + } diff --git a/modelzoo/FNN/script/layers/activation.py b/modelzoo/FNN/script/layers/activation.py new file mode 100644 index 00000000000..1b953bff8bc --- /dev/null +++ b/modelzoo/FNN/script/layers/activation.py @@ -0,0 +1,85 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import tensorflow as tf + +try: + from tensorflow.python.ops.init_ops import Zeros +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros +from tensorflow.python.keras.layers import Layer, Activation + +try: + from tensorflow.python.keras.layers import BatchNormalization +except ImportError: + BatchNormalization = tf.keras.layers.BatchNormalization + +try: + unicode +except NameError: + unicode = str + + +class Dice(Layer): + """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data. + + Input shape + - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. + + Output shape + - Same shape as the input. + + Arguments + - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). + + - **epsilon** : Small float added to variance to avoid dividing by zero. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, axis=-1, epsilon=1e-9, **kwargs): + self.axis = axis + self.epsilon = epsilon + super(Dice, self).__init__(**kwargs) + + def build(self, input_shape): + self.bn = BatchNormalization( + axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros( + ), dtype=tf.float32, name='dice_alpha') # name='alpha_'+self.name + super(Dice, self).build(input_shape) # Be sure to call this somewhere! + self.uses_learning_phase = True + + def call(self, inputs, training=None, **kwargs): + inputs_normed = self.bn(inputs, training=training) + # tf.layers.batch_normalization( + # inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + x_p = tf.sigmoid(inputs_normed) + return self.alphas * (1.0 - x_p) * inputs + x_p * inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'axis': self.axis, 'epsilon': self.epsilon} + base_config = super(Dice, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def activation_layer(activation): + if activation in ("dice", "Dice"): + act_layer = Dice() + elif isinstance(activation, (str, unicode)): + act_layer = Activation(activation) + elif issubclass(activation, Layer): + act_layer = activation() + else: + raise ValueError( + "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation)) + return act_layer diff --git a/modelzoo/FNN/script/layers/core.py b/modelzoo/FNN/script/layers/core.py new file mode 100644 index 00000000000..668348d2eb7 --- /dev/null +++ b/modelzoo/FNN/script/layers/core.py @@ -0,0 +1,267 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import tensorflow as tf +from tensorflow.python.keras import backend as K + +try: + from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal +except ImportError: + from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal + +from tensorflow.python.keras.layers import Layer, Dropout + +try: + from tensorflow.python.keras.layers import BatchNormalization +except ImportError: + BatchNormalization = tf.keras.layers.BatchNormalization +from tensorflow.python.keras.regularizers import l2 + +from .activation import activation_layer + + +class LocalActivationUnit(Layer): + """The LocalActivationUnit used in DIN with which the representation of + user interests varies adaptively given different candidate items. + + Input shape + - A list of two 3D tensor with shape: ``(batch_size, 1, embedding_size)`` and ``(batch_size, T, embedding_size)`` + + Output shape + - 3D tensor with shape: ``(batch_size, T, 1)``. + + Arguments + - **hidden_units**:list of positive integer, the attention net layer number and units in each layer. + + - **activation**: Activation function to use in attention net. + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix of attention net. + + - **dropout_rate**: float in [0,1). Fraction of the units to dropout in attention net. + + - **use_bn**: bool. Whether use BatchNormalization before activation or not in attention net. + + - **seed**: A Python integer to use as random seed. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, hidden_units=(64, 32), activation='sigmoid', l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, + **kwargs): + self.hidden_units = hidden_units + self.activation = activation + self.l2_reg = l2_reg + self.dropout_rate = dropout_rate + self.use_bn = use_bn + self.seed = seed + super(LocalActivationUnit, self).__init__(**kwargs) + self.supports_masking = True + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) != 2: + raise ValueError('A `LocalActivationUnit` layer should be called ' + 'on a list of 2 inputs') + + if len(input_shape[0]) != 3 or len(input_shape[1]) != 3: + raise ValueError("Unexpected inputs dimensions %d and %d, expect to be 3 dimensions" % ( + len(input_shape[0]), len(input_shape[1]))) + + if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1: + raise ValueError('A `LocalActivationUnit` layer requires ' + 'inputs of a two inputs with shape (None,1,embedding_size) and (None,T,embedding_size)' + 'Got different shapes: %s,%s' % (input_shape[0], input_shape[1])) + size = 4 * \ + int(input_shape[0][-1] + ) if len(self.hidden_units) == 0 else self.hidden_units[-1] + self.kernel = self.add_weight(shape=(size, 1), + initializer=glorot_normal( + seed=self.seed), + name="kernel") + self.bias = self.add_weight( + shape=(1,), initializer=Zeros(), name="bias") + self.dnn = DNN(self.hidden_units, self.activation, self.l2_reg, self.dropout_rate, self.use_bn, seed=self.seed) + + super(LocalActivationUnit, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, training=None, **kwargs): + + query, keys = inputs + + keys_len = keys.get_shape()[1] + queries = K.repeat_elements(query, keys_len, 1) + + att_input = tf.concat( + [queries, keys, queries - keys, queries * keys], axis=-1) + + att_out = self.dnn(att_input, training=training) + + attention_score = tf.nn.bias_add(tf.tensordot(att_out, self.kernel, axes=(-1, 0)), self.bias) + + return attention_score + + def compute_output_shape(self, input_shape): + return input_shape[1][:2] + (1,) + + def compute_mask(self, inputs, mask): + return mask + + def get_config(self, ): + config = {'activation': self.activation, 'hidden_units': self.hidden_units, + 'l2_reg': self.l2_reg, 'dropout_rate': self.dropout_rate, 'use_bn': self.use_bn, 'seed': self.seed} + base_config = super(LocalActivationUnit, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class DNN(Layer): + """The Multi Layer Percetron + + Input shape + - nD tensor with shape: ``(batch_size, ..., input_dim)``. The most common situation would be a 2D input with shape ``(batch_size, input_dim)``. + + Output shape + - nD tensor with shape: ``(batch_size, ..., hidden_size[-1])``. For instance, for a 2D input with shape ``(batch_size, input_dim)``, the output would have shape ``(batch_size, hidden_size[-1])``. + + Arguments + - **hidden_units**:list of positive integer, the layer number and units in each layer. + + - **activation**: Activation function to use. + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix. + + - **dropout_rate**: float in [0,1). Fraction of the units to dropout. + + - **use_bn**: bool. Whether use BatchNormalization before activation or not. + + - **output_activation**: Activation function to use in the last layer.If ``None``,it will be same as ``activation``. + + - **seed**: A Python integer to use as random seed. + """ + + def __init__(self, hidden_units, activation='relu', l2_reg=0, dropout_rate=0, use_bn=False, output_activation=None, + seed=1024, **kwargs): + self.hidden_units = hidden_units + self.activation = activation + self.l2_reg = l2_reg + self.dropout_rate = dropout_rate + self.use_bn = use_bn + self.output_activation = output_activation + self.seed = seed + + super(DNN, self).__init__(**kwargs) + + def build(self, input_shape): + # if len(self.hidden_units) == 0: + # raise ValueError("hidden_units is empty") + input_size = input_shape[-1] + hidden_units = [int(input_size)] + list(self.hidden_units) + self.kernels = [self.add_weight(name='kernel' + str(i), + shape=( + hidden_units[i], hidden_units[i + 1]), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(len(self.hidden_units))] + self.bias = [self.add_weight(name='bias' + str(i), + shape=(self.hidden_units[i],), + initializer=Zeros(), + trainable=True) for i in range(len(self.hidden_units))] + if self.use_bn: + self.bn_layers = [BatchNormalization() for _ in range(len(self.hidden_units))] + + self.dropout_layers = [Dropout(self.dropout_rate, seed=self.seed + i) for i in + range(len(self.hidden_units))] + + self.activation_layers = [activation_layer(self.activation) for _ in range(len(self.hidden_units))] + + if self.output_activation: + self.activation_layers[-1] = activation_layer(self.output_activation) + + super(DNN, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, training=None, **kwargs): + + deep_input = inputs + + for i in range(len(self.hidden_units)): + fc = tf.nn.bias_add(tf.tensordot( + deep_input, self.kernels[i], axes=(-1, 0)), self.bias[i]) + + if self.use_bn: + fc = self.bn_layers[i](fc, training=training) + try: + fc = self.activation_layers[i](fc, training=training) + except TypeError as e: # TypeError: call() got an unexpected keyword argument 'training' + print("make sure the activation function use training flag properly", e) + fc = self.activation_layers[i](fc) + + fc = self.dropout_layers[i](fc, training=training) + deep_input = fc + + return deep_input + + def compute_output_shape(self, input_shape): + if len(self.hidden_units) > 0: + shape = input_shape[:-1] + (self.hidden_units[-1],) + else: + shape = input_shape + + return tuple(shape) + + def get_config(self, ): + config = {'activation': self.activation, 'hidden_units': self.hidden_units, + 'l2_reg': self.l2_reg, 'use_bn': self.use_bn, 'dropout_rate': self.dropout_rate, + 'output_activation': self.output_activation, 'seed': self.seed} + base_config = super(DNN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class PredictionLayer(Layer): + """ + Arguments + - **task**: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + + - **use_bias**: bool.Whether add bias term or not. + """ + + def __init__(self, task='binary', use_bias=True, **kwargs): + if task not in ["binary", "multiclass", "regression"]: + raise ValueError("task must be binary,multiclass or regression") + self.task = task + self.use_bias = use_bias + super(PredictionLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if self.use_bias: + self.global_bias = self.add_weight( + shape=(1,), initializer=Zeros(), name="global_bias") + + # Be sure to call this somewhere! + super(PredictionLayer, self).build(input_shape) + + def call(self, inputs, **kwargs): + x = inputs + if self.use_bias: + x = tf.nn.bias_add(x, self.global_bias, data_format='NHWC') + if self.task == "binary": + x = tf.sigmoid(x) + + output = tf.reshape(x, (-1, 1)) + + return output + + def compute_output_shape(self, input_shape): + return (None, 1) + + def get_config(self, ): + config = {'task': self.task, 'use_bias': self.use_bias} + base_config = super(PredictionLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/modelzoo/FNN/script/layers/interaction.py b/modelzoo/FNN/script/layers/interaction.py new file mode 100644 index 00000000000..f19be14be9c --- /dev/null +++ b/modelzoo/FNN/script/layers/interaction.py @@ -0,0 +1,1492 @@ +# -*- coding:utf-8 -*- +""" + +Authors: + Weichen Shen,weichenswc@163.com, + Harshit Pande + +""" + +import itertools + +import tensorflow as tf +from tensorflow.python.keras import backend as K +from tensorflow.python.keras.backend import batch_dot + +try: + from tensorflow.python.ops.init_ops import Zeros, Ones, Constant, TruncatedNormal, \ + glorot_normal_initializer as glorot_normal, \ + glorot_uniform_initializer as glorot_uniform +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros, Ones, Constant, TruncatedNormal, glorot_normal, glorot_uniform + +from tensorflow.python.keras.layers import Layer, MaxPooling2D, Conv2D, Dropout, Lambda, Dense, Flatten +from tensorflow.python.keras.regularizers import l2 +from tensorflow.python.layers import utils + +from .activation import activation_layer +from .utils import concat_func, reduce_sum, softmax, reduce_mean + + +class AFMLayer(Layer): + """Attentonal Factorization Machine models pairwise (order-2) feature + interactions without linear term and bias. + + Input shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + + Arguments + - **attention_factor** : Positive integer, dimensionality of the + attention network output space. + + - **l2_reg_w** : float between 0 and 1. L2 regularizer strength + applied to attention network. + + - **dropout_rate** : float between in [0,1). Fraction of the attention net output units to dropout. + + - **seed** : A Python integer to use as random seed. + + References + - [Attentional Factorization Machines : Learning the Weight of Feature + Interactions via Attention Networks](https://arxiv.org/pdf/1708.04617.pdf) + """ + + def __init__(self, attention_factor=4, l2_reg_w=0, dropout_rate=0, seed=1024, **kwargs): + self.attention_factor = attention_factor + self.l2_reg_w = l2_reg_w + self.dropout_rate = dropout_rate + self.seed = seed + super(AFMLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + # input_shape = input_shape[0] + # if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `AttentionalFM` layer should be called ' + 'on a list of at least 2 inputs') + + shape_set = set() + reduced_input_shape = [shape.as_list() for shape in input_shape] + for i in range(len(input_shape)): + shape_set.add(tuple(reduced_input_shape[i])) + + if len(shape_set) > 1: + raise ValueError('A `AttentionalFM` layer requires ' + 'inputs with same shapes ' + 'Got different shapes: %s' % (shape_set)) + + if len(input_shape[0]) != 3 or input_shape[0][1] != 1: + raise ValueError('A `AttentionalFM` layer requires ' + 'inputs of a list with same shape tensor like\ + (None, 1, embedding_size)' + 'Got different shapes: %s' % (input_shape[0])) + + embedding_size = int(input_shape[0][-1]) + + self.attention_W = self.add_weight(shape=(embedding_size, + self.attention_factor), initializer=glorot_normal(seed=self.seed), + regularizer=l2(self.l2_reg_w), name="attention_W") + self.attention_b = self.add_weight( + shape=(self.attention_factor,), initializer=Zeros(), name="attention_b") + self.projection_h = self.add_weight(shape=(self.attention_factor, 1), + initializer=glorot_normal(seed=self.seed), name="projection_h") + self.projection_p = self.add_weight(shape=( + embedding_size, 1), initializer=glorot_normal(seed=self.seed), name="projection_p") + self.dropout = Dropout( + self.dropout_rate, seed=self.seed) + + self.tensordot = Lambda( + lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0))) + + # Be sure to call this somewhere! + super(AFMLayer, self).build(input_shape) + + def call(self, inputs, training=None, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embeds_vec_list = inputs + row = [] + col = [] + + for r, c in itertools.combinations(embeds_vec_list, 2): + row.append(r) + col.append(c) + + p = tf.concat(row, axis=1) + q = tf.concat(col, axis=1) + inner_product = p * q + + bi_interaction = inner_product + attention_temp = tf.nn.relu(tf.nn.bias_add(tf.tensordot( + bi_interaction, self.attention_W, axes=(-1, 0)), self.attention_b)) + # Dense(self.attention_factor,'relu',kernel_regularizer=l2(self.l2_reg_w))(bi_interaction) + self.normalized_att_score = softmax(tf.tensordot( + attention_temp, self.projection_h, axes=(-1, 0)), dim=1) + attention_output = reduce_sum( + self.normalized_att_score * bi_interaction, axis=1) + + attention_output = self.dropout(attention_output, training=training) # training + + afm_out = self.tensordot([attention_output, self.projection_p]) + return afm_out + + def compute_output_shape(self, input_shape): + + if not isinstance(input_shape, list): + raise ValueError('A `AFMLayer` layer should be called ' + 'on a list of inputs.') + return (None, 1) + + def get_config(self, ): + config = {'attention_factor': self.attention_factor, + 'l2_reg_w': self.l2_reg_w, 'dropout_rate': self.dropout_rate, 'seed': self.seed} + base_config = super(AFMLayer, self).get_config() + base_config.update(config) + return base_config + + +class BiInteractionPooling(Layer): + """Bi-Interaction Layer used in Neural FM,compress the + pairwise element-wise product of features into one single vector. + + Input shape + - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``. + + Output shape + - 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + References + - [He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364.](http://arxiv.org/abs/1708.05027) + """ + + def __init__(self, **kwargs): + + super(BiInteractionPooling, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + + super(BiInteractionPooling, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + concated_embeds_value = inputs + square_of_sum = tf.square(reduce_sum( + concated_embeds_value, axis=1, keep_dims=True)) + sum_of_square = reduce_sum( + concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True) + cross_term = 0.5 * (square_of_sum - sum_of_square) + + return cross_term + + def compute_output_shape(self, input_shape): + return (None, 1, input_shape[-1]) + + +class CIN(Layer): + """Compressed Interaction Network used in xDeepFM.This implemention is + adapted from code that the author of the paper published on https://github.com/Leavingseason/xDeepFM. + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, featuremap_num)`` ``featuremap_num = sum(self.layer_size[:-1]) // 2 + self.layer_size[-1]`` if ``split_half=True``,else ``sum(layer_size)`` . + + Arguments + - **layer_size** : list of int.Feature maps in each layer. + + - **activation** : activation function used on feature maps. + + - **split_half** : bool.if set to False, half of the feature maps in each hidden will connect to output unit. + + - **seed** : A Python integer to use as random seed. + + References + - [Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.] (https://arxiv.org/pdf/1803.05170.pdf) + """ + + def __init__(self, layer_size=(128, 128), activation='relu', split_half=True, l2_reg=1e-5, seed=1024, **kwargs): + if len(layer_size) == 0: + raise ValueError( + "layer_size must be a list(tuple) of length greater than 1") + self.layer_size = layer_size + self.split_half = split_half + self.activation = activation + self.l2_reg = l2_reg + self.seed = seed + super(CIN, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + + self.field_nums = [int(input_shape[1])] + self.filters = [] + self.bias = [] + for i, size in enumerate(self.layer_size): + + self.filters.append(self.add_weight(name='filter' + str(i), + shape=[1, self.field_nums[-1] + * self.field_nums[0], size], + dtype=tf.float32, initializer=glorot_uniform( + seed=self.seed + i), + regularizer=l2(self.l2_reg))) + + self.bias.append(self.add_weight(name='bias' + str(i), shape=[size], dtype=tf.float32, + initializer=Zeros())) + + if self.split_half: + if i != len(self.layer_size) - 1 and size % 2 > 0: + raise ValueError( + "layer_size must be even number except for the last layer when split_half=True") + + self.field_nums.append(size // 2) + else: + self.field_nums.append(size) + + self.activation_layers = [activation_layer( + self.activation) for _ in self.layer_size] + + super(CIN, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + dim = int(inputs.get_shape()[-1]) + hidden_nn_layers = [inputs] + final_result = [] + + split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2) + for idx, layer_size in enumerate(self.layer_size): + split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2) + + dot_result_m = tf.matmul( + split_tensor0, split_tensor, transpose_b=True) + + dot_result_o = tf.reshape( + dot_result_m, shape=[dim, -1, self.field_nums[0] * self.field_nums[idx]]) + + dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2]) + + curr_out = tf.nn.conv1d( + dot_result, filters=self.filters[idx], stride=1, padding='VALID') + + curr_out = tf.nn.bias_add(curr_out, self.bias[idx]) + + curr_out = self.activation_layers[idx](curr_out) + + curr_out = tf.transpose(curr_out, perm=[0, 2, 1]) + + if self.split_half: + if idx != len(self.layer_size) - 1: + next_hidden, direct_connect = tf.split( + curr_out, 2 * [layer_size // 2], 1) + else: + direct_connect = curr_out + next_hidden = 0 + else: + direct_connect = curr_out + next_hidden = curr_out + + final_result.append(direct_connect) + hidden_nn_layers.append(next_hidden) + + result = tf.concat(final_result, axis=1) + result = reduce_sum(result, -1, keep_dims=False) + + return result + + def compute_output_shape(self, input_shape): + if self.split_half: + featuremap_num = sum( + self.layer_size[:-1]) // 2 + self.layer_size[-1] + else: + featuremap_num = sum(self.layer_size) + return (None, featuremap_num) + + def get_config(self, ): + + config = {'layer_size': self.layer_size, 'split_half': self.split_half, 'activation': self.activation, + 'seed': self.seed} + base_config = super(CIN, self).get_config() + base_config.update(config) + return base_config + + +class CrossNet(Layer): + """The Cross Network part of Deep&Cross Network model, + which leans both low and high degree cross feature. + + Input shape + - 2D tensor with shape: ``(batch_size, units)``. + + Output shape + - 2D tensor with shape: ``(batch_size, units)``. + + Arguments + - **layer_num**: Positive integer, the cross layer number + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix + + - **parameterization**: string, ``"vector"`` or ``"matrix"`` , way to parameterize the cross network. + + - **seed**: A Python integer to use as random seed. + + References + - [Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12.](https://arxiv.org/abs/1708.05123) + """ + + def __init__(self, layer_num=2, parameterization='vector', l2_reg=0, seed=1024, **kwargs): + self.layer_num = layer_num + self.parameterization = parameterization + self.l2_reg = l2_reg + self.seed = seed + print('CrossNet parameterization:', self.parameterization) + super(CrossNet, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),)) + + dim = int(input_shape[-1]) + if self.parameterization == 'vector': + self.kernels = [self.add_weight(name='kernel' + str(i), + shape=(dim, 1), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + elif self.parameterization == 'matrix': + self.kernels = [self.add_weight(name='kernel' + str(i), + shape=(dim, dim), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + else: # error + raise ValueError("parameterization should be 'vector' or 'matrix'") + self.bias = [self.add_weight(name='bias' + str(i), + shape=(dim, 1), + initializer=Zeros(), + trainable=True) for i in range(self.layer_num)] + # Be sure to call this somewhere! + super(CrossNet, self).build(input_shape) + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs))) + + x_0 = tf.expand_dims(inputs, axis=2) + x_l = x_0 + for i in range(self.layer_num): + if self.parameterization == 'vector': + xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0)) + dot_ = tf.matmul(x_0, xl_w) + x_l = dot_ + self.bias[i] + x_l + elif self.parameterization == 'matrix': + xl_w = tf.einsum('ij,bjk->bik', self.kernels[i], x_l) # W * xi (bs, dim, 1) + dot_ = xl_w + self.bias[i] # W * xi + b + x_l = x_0 * dot_ + x_l # x0 · (W * xi + b) +xl Hadamard-product + else: # error + raise ValueError("parameterization should be 'vector' or 'matrix'") + x_l = tf.squeeze(x_l, axis=2) + return x_l + + def get_config(self, ): + + config = {'layer_num': self.layer_num, 'parameterization': self.parameterization, + 'l2_reg': self.l2_reg, 'seed': self.seed} + base_config = super(CrossNet, self).get_config() + base_config.update(config) + return base_config + + def compute_output_shape(self, input_shape): + return input_shape + + +class CrossNetMix(Layer): + """The Cross Network part of DCN-Mix model, which improves DCN-M by: + 1 add MOE to learn feature interactions in different subspaces + 2 add nonlinear transformations in low-dimensional space + + Input shape + - 2D tensor with shape: ``(batch_size, units)``. + + Output shape + - 2D tensor with shape: ``(batch_size, units)``. + + Arguments + - **low_rank** : Positive integer, dimensionality of low-rank sapce. + + - **num_experts** : Positive integer, number of experts. + + - **layer_num**: Positive integer, the cross layer number + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix + + - **seed**: A Python integer to use as random seed. + + References + - [Wang R, Shivanna R, Cheng D Z, et al. DCN-M: Improved Deep & Cross Network for Feature Cross Learning in Web-scale Learning to Rank Systems[J]. 2020.](https://arxiv.org/abs/2008.13535) + """ + + def __init__(self, low_rank=32, num_experts=4, layer_num=2, l2_reg=0, seed=1024, **kwargs): + self.low_rank = low_rank + self.num_experts = num_experts + self.layer_num = layer_num + self.l2_reg = l2_reg + self.seed = seed + super(CrossNetMix, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),)) + + dim = int(input_shape[-1]) + + # U: (dim, low_rank) + self.U_list = [self.add_weight(name='U_list' + str(i), + shape=(self.num_experts, dim, self.low_rank), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + # V: (dim, low_rank) + self.V_list = [self.add_weight(name='V_list' + str(i), + shape=(self.num_experts, dim, self.low_rank), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + # C: (low_rank, low_rank) + self.C_list = [self.add_weight(name='C_list' + str(i), + shape=(self.num_experts, self.low_rank, self.low_rank), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + + self.gating = [Dense(1, use_bias=False) for i in range(self.num_experts)] + + self.bias = [self.add_weight(name='bias' + str(i), + shape=(dim, 1), + initializer=Zeros(), + trainable=True) for i in range(self.layer_num)] + # Be sure to call this somewhere! + super(CrossNetMix, self).build(input_shape) + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs))) + + x_0 = tf.expand_dims(inputs, axis=2) + x_l = x_0 + for i in range(self.layer_num): + output_of_experts = [] + gating_score_of_experts = [] + for expert_id in range(self.num_experts): + # (1) G(x_l) + # compute the gating score by x_l + gating_score_of_experts.append(self.gating[expert_id](tf.squeeze(x_l, axis=2))) + + # (2) E(x_l) + # project the input x_l to $\mathbb{R}^{r}$ + v_x = tf.einsum('ij,bjk->bik', tf.transpose(self.V_list[i][expert_id]), x_l) # (bs, low_rank, 1) + + # nonlinear activation in low rank space + v_x = tf.nn.tanh(v_x) + v_x = tf.einsum('ij,bjk->bik', self.C_list[i][expert_id], v_x) # (bs, low_rank, 1) + v_x = tf.nn.tanh(v_x) + + # project back to $\mathbb{R}^{d}$ + uv_x = tf.einsum('ij,bjk->bik', self.U_list[i][expert_id], v_x) # (bs, dim, 1) + + dot_ = uv_x + self.bias[i] + dot_ = x_0 * dot_ # Hadamard-product + + output_of_experts.append(tf.squeeze(dot_, axis=2)) + + # (3) mixture of low-rank experts + output_of_experts = tf.stack(output_of_experts, 2) # (bs, dim, num_experts) + gating_score_of_experts = tf.stack(gating_score_of_experts, 1) # (bs, num_experts, 1) + moe_out = tf.matmul(output_of_experts, tf.nn.softmax(gating_score_of_experts, 1)) + x_l = moe_out + x_l # (bs, dim, 1) + x_l = tf.squeeze(x_l, axis=2) + return x_l + + def get_config(self, ): + + config = {'low_rank': self.low_rank, 'num_experts': self.num_experts, 'layer_num': self.layer_num, + 'l2_reg': self.l2_reg, 'seed': self.seed} + base_config = super(CrossNetMix, self).get_config() + base_config.update(config) + return base_config + + def compute_output_shape(self, input_shape): + return input_shape + + +class FM(Layer): + """Factorization Machine models pairwise (order-2) feature interactions + without linear term and bias. + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + + References + - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) + """ + + def __init__(self, **kwargs): + + super(FM, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError("Unexpected inputs dimensions % d,\ + expect to be 3 dimensions" % (len(input_shape))) + + super(FM, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" + % (K.ndim(inputs))) + + concated_embeds_value = inputs + + square_of_sum = tf.square(reduce_sum( + concated_embeds_value, axis=1, keep_dims=True)) + sum_of_square = reduce_sum( + concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True) + cross_term = square_of_sum - sum_of_square + cross_term = 0.5 * reduce_sum(cross_term, axis=2, keep_dims=False) + + return cross_term + + def compute_output_shape(self, input_shape): + return (None, 1) + + + +class InnerProductLayer(Layer): + """InnerProduct Layer used in PNN that compute the element-wise + product or inner product between feature vectors. + + Input shape + - a list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - 3D tensor with shape: ``(batch_size, N*(N-1)/2 ,1)`` if use reduce_sum. or 3D tensor with shape: ``(batch_size, N*(N-1)/2, embedding_size )`` if not use reduce_sum. + + Arguments + - **reduce_sum**: bool. Whether return inner product or element-wise product + + References + - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf) + """ + + def __init__(self, reduce_sum=True, **kwargs): + self.reduce_sum = reduce_sum + super(InnerProductLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `InnerProductLayer` layer should be called ' + 'on a list of at least 2 inputs') + + reduced_inputs_shapes = [shape.as_list() for shape in input_shape] + shape_set = set() + + for i in range(len(input_shape)): + shape_set.add(tuple(reduced_inputs_shapes[i])) + + if len(shape_set) > 1: + raise ValueError('A `InnerProductLayer` layer requires ' + 'inputs with same shapes ' + 'Got different shapes: %s' % (shape_set)) + + if len(input_shape[0]) != 3 or input_shape[0][1] != 1: + raise ValueError('A `InnerProductLayer` layer requires ' + 'inputs of a list with same shape tensor like (None,1,embedding_size)' + 'Got different shapes: %s' % (input_shape[0])) + super(InnerProductLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embed_list = inputs + row = [] + col = [] + num_inputs = len(embed_list) + + for i in range(num_inputs - 1): + for j in range(i + 1, num_inputs): + row.append(i) + col.append(j) + p = tf.concat([embed_list[idx] + for idx in row], axis=1) # batch num_pairs k + q = tf.concat([embed_list[idx] + for idx in col], axis=1) + + inner_product = p * q + if self.reduce_sum: + inner_product = reduce_sum( + inner_product, axis=2, keep_dims=True) + return inner_product + + def compute_output_shape(self, input_shape): + num_inputs = len(input_shape) + num_pairs = int(num_inputs * (num_inputs - 1) / 2) + input_shape = input_shape[0] + embed_size = input_shape[-1] + if self.reduce_sum: + return (input_shape[0], num_pairs, 1) + else: + return (input_shape[0], num_pairs, embed_size) + + def get_config(self, ): + config = {'reduce_sum': self.reduce_sum, } + base_config = super(InnerProductLayer, self).get_config() + base_config.update(config) + return base_config + + +class InteractingLayer(Layer): + """A Layer used in AutoInt that model the correlations between different feature fields by multi-head self-attention mechanism. + + Input shape + - A 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 3D tensor with shape:``(batch_size,field_size,att_embedding_size * head_num)``. + + + Arguments + - **att_embedding_size**: int.The embedding size in multi-head self-attention network. + - **head_num**: int.The head number in multi-head self-attention network. + - **use_res**: bool.Whether or not use standard residual connections before output. + - **seed**: A Python integer to use as random seed. + + References + - [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921) + """ + + def __init__(self, att_embedding_size=8, head_num=2, use_res=True, scaling=False, seed=1024, **kwargs): + if head_num <= 0: + raise ValueError('head_num must be a int > 0') + self.att_embedding_size = att_embedding_size + self.head_num = head_num + self.use_res = use_res + self.seed = seed + self.scaling = scaling + super(InteractingLayer, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + embedding_size = int(input_shape[-1]) + self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed)) + self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 1)) + self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 2)) + if self.use_res: + self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed)) + + # Be sure to call this somewhere! + super(InteractingLayer, self).build(input_shape) + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + querys = tf.tensordot(inputs, self.W_Query, + axes=(-1, 0)) # None F D*head_num + keys = tf.tensordot(inputs, self.W_key, axes=(-1, 0)) + values = tf.tensordot(inputs, self.W_Value, axes=(-1, 0)) + + # head_num None F D + querys = tf.stack(tf.split(querys, self.head_num, axis=2)) + keys = tf.stack(tf.split(keys, self.head_num, axis=2)) + values = tf.stack(tf.split(values, self.head_num, axis=2)) + + inner_product = tf.matmul( + querys, keys, transpose_b=True) # head_num None F F + if self.scaling: + inner_product /= self.att_embedding_size ** 0.5 + self.normalized_att_scores = softmax(inner_product) + + result = tf.matmul(self.normalized_att_scores, + values) # head_num None F D + result = tf.concat(tf.split(result, self.head_num, ), axis=-1) + result = tf.squeeze(result, axis=0) # None F D*head_num + + if self.use_res: + result += tf.tensordot(inputs, self.W_Res, axes=(-1, 0)) + result = tf.nn.relu(result) + + return result + + def compute_output_shape(self, input_shape): + + return (None, input_shape[1], self.att_embedding_size * self.head_num) + + def get_config(self, ): + config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, 'use_res': self.use_res, + 'seed': self.seed} + base_config = super(InteractingLayer, self).get_config() + base_config.update(config) + return base_config + + +class OutterProductLayer(Layer): + """OutterProduct Layer used in PNN.This implemention is + adapted from code that the author of the paper published on https://github.com/Atomu2014/product-nets. + + Input shape + - A list of N 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - 2D tensor with shape:``(batch_size,N*(N-1)/2 )``. + + Arguments + - **kernel_type**: str. The kernel weight matrix type to use,can be mat,vec or num + + - **seed**: A Python integer to use as random seed. + + References + - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf) + """ + + def __init__(self, kernel_type='mat', seed=1024, **kwargs): + if kernel_type not in ['mat', 'vec', 'num']: + raise ValueError("kernel_type must be mat,vec or num") + self.kernel_type = kernel_type + self.seed = seed + super(OutterProductLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `OutterProductLayer` layer should be called ' + 'on a list of at least 2 inputs') + + reduced_inputs_shapes = [shape.as_list() for shape in input_shape] + shape_set = set() + + for i in range(len(input_shape)): + shape_set.add(tuple(reduced_inputs_shapes[i])) + + if len(shape_set) > 1: + raise ValueError('A `OutterProductLayer` layer requires ' + 'inputs with same shapes ' + 'Got different shapes: %s' % (shape_set)) + + if len(input_shape[0]) != 3 or input_shape[0][1] != 1: + raise ValueError('A `OutterProductLayer` layer requires ' + 'inputs of a list with same shape tensor like (None,1,embedding_size)' + 'Got different shapes: %s' % (input_shape[0])) + num_inputs = len(input_shape) + num_pairs = int(num_inputs * (num_inputs - 1) / 2) + input_shape = input_shape[0] + embed_size = int(input_shape[-1]) + if self.kernel_type == 'mat': + + self.kernel = self.add_weight(shape=(embed_size, num_pairs, embed_size), + initializer=glorot_uniform( + seed=self.seed), + name='kernel') + elif self.kernel_type == 'vec': + self.kernel = self.add_weight(shape=(num_pairs, embed_size,), initializer=glorot_uniform(self.seed), + name='kernel' + ) + elif self.kernel_type == 'num': + self.kernel = self.add_weight( + shape=(num_pairs, 1), initializer=glorot_uniform(self.seed), name='kernel') + + super(OutterProductLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embed_list = inputs + row = [] + col = [] + num_inputs = len(embed_list) + for i in range(num_inputs - 1): + for j in range(i + 1, num_inputs): + row.append(i) + col.append(j) + p = tf.concat([embed_list[idx] + for idx in row], axis=1) # batch num_pairs k + # Reshape([num_pairs, self.embedding_size]) + q = tf.concat([embed_list[idx] for idx in col], axis=1) + + # ------------------------- + if self.kernel_type == 'mat': + p = tf.expand_dims(p, 1) + # k k* pair* k + # batch * pair + kp = reduce_sum( + + # batch * pair * k + + tf.multiply( + + # batch * pair * k + + tf.transpose( + + # batch * k * pair + + reduce_sum( + + # batch * k * pair * k + + tf.multiply( + + p, self.kernel), + + -1), + + [0, 2, 1]), + + q), + + -1) + else: + # 1 * pair * (k or 1) + + k = tf.expand_dims(self.kernel, 0) + + # batch * pair + + kp = reduce_sum(p * q * k, -1) + + # p q # b * p * k + + return kp + + def compute_output_shape(self, input_shape): + num_inputs = len(input_shape) + num_pairs = int(num_inputs * (num_inputs - 1) / 2) + return (None, num_pairs) + + def get_config(self, ): + config = {'kernel_type': self.kernel_type, 'seed': self.seed} + base_config = super(OutterProductLayer, self).get_config() + base_config.update(config) + return base_config + + +class FGCNNLayer(Layer): + """Feature Generation Layer used in FGCNN,including Convolution,MaxPooling and Recombination. + + Input shape + - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``. + + Output shape + - 3D tensor with shape: ``(batch_size,new_feture_num,embedding_size)``. + + References + - [Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019.](https://arxiv.org/pdf/1904.04447) + + """ + + def __init__(self, filters=(14, 16,), kernel_width=(7, 7,), new_maps=(3, 3,), pooling_width=(2, 2), + **kwargs): + if not (len(filters) == len(kernel_width) == len(new_maps) == len(pooling_width)): + raise ValueError("length of argument must be equal") + self.filters = filters + self.kernel_width = kernel_width + self.new_maps = new_maps + self.pooling_width = pooling_width + + super(FGCNNLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + self.conv_layers = [] + self.pooling_layers = [] + self.dense_layers = [] + pooling_shape = input_shape.as_list() + [1, ] + embedding_size = int(input_shape[-1]) + for i in range(1, len(self.filters) + 1): + filters = self.filters[i - 1] + width = self.kernel_width[i - 1] + new_filters = self.new_maps[i - 1] + pooling_width = self.pooling_width[i - 1] + conv_output_shape = self._conv_output_shape( + pooling_shape, (width, 1)) + pooling_shape = self._pooling_output_shape( + conv_output_shape, (pooling_width, 1)) + self.conv_layers.append(Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1), + padding='same', + activation='tanh', use_bias=True, )) + self.pooling_layers.append( + MaxPooling2D(pool_size=(pooling_width, 1))) + self.dense_layers.append(Dense(pooling_shape[1] * embedding_size * new_filters, + activation='tanh', use_bias=True)) + + self.flatten = Flatten() + + super(FGCNNLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embedding_size = int(inputs.shape[-1]) + pooling_result = tf.expand_dims(inputs, axis=3) + + new_feature_list = [] + + for i in range(1, len(self.filters) + 1): + new_filters = self.new_maps[i - 1] + + conv_result = self.conv_layers[i - 1](pooling_result) + + pooling_result = self.pooling_layers[i - 1](conv_result) + + flatten_result = self.flatten(pooling_result) + + new_result = self.dense_layers[i - 1](flatten_result) + + new_feature_list.append( + tf.reshape(new_result, (-1, int(pooling_result.shape[1]) * new_filters, embedding_size))) + + new_features = concat_func(new_feature_list, axis=1) + return new_features + + def compute_output_shape(self, input_shape): + + new_features_num = 0 + features_num = input_shape[1] + + for i in range(0, len(self.kernel_width)): + pooled_features_num = features_num // self.pooling_width[i] + new_features_num += self.new_maps[i] * pooled_features_num + features_num = pooled_features_num + + return (None, new_features_num, input_shape[-1]) + + def get_config(self, ): + config = {'kernel_width': self.kernel_width, 'filters': self.filters, 'new_maps': self.new_maps, + 'pooling_width': self.pooling_width} + base_config = super(FGCNNLayer, self).get_config() + base_config.update(config) + return base_config + + def _conv_output_shape(self, input_shape, kernel_size): + # channels_last + space = input_shape[1:-1] + new_space = [] + for i in range(len(space)): + new_dim = utils.conv_output_length( + space[i], + kernel_size[i], + padding='same', + stride=1, + dilation=1) + new_space.append(new_dim) + return ([input_shape[0]] + new_space + [self.filters]) + + def _pooling_output_shape(self, input_shape, pool_size): + # channels_last + + rows = input_shape[1] + cols = input_shape[2] + rows = utils.conv_output_length(rows, pool_size[0], 'valid', + pool_size[0]) + cols = utils.conv_output_length(cols, pool_size[1], 'valid', + pool_size[1]) + return [input_shape[0], rows, cols, input_shape[3]] + + +class SENETLayer(Layer): + """SENETLayer used in FiBiNET. + + Input shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Arguments + - **reduction_ratio** : Positive integer, dimensionality of the + attention network output space. + + - **seed** : A Python integer to use as random seed. + + References + - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) + """ + + def __init__(self, reduction_ratio=3, seed=1024, **kwargs): + self.reduction_ratio = reduction_ratio + + self.seed = seed + super(SENETLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `AttentionalFM` layer should be called ' + 'on a list of at least 2 inputs') + + self.filed_size = len(input_shape) + self.embedding_size = input_shape[0][-1] + reduction_size = max(1, self.filed_size // self.reduction_ratio) + + self.W_1 = self.add_weight(shape=( + self.filed_size, reduction_size), initializer=glorot_normal(seed=self.seed), name="W_1") + self.W_2 = self.add_weight(shape=( + reduction_size, self.filed_size), initializer=glorot_normal(seed=self.seed), name="W_2") + + self.tensordot = Lambda( + lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0))) + + # Be sure to call this somewhere! + super(SENETLayer, self).build(input_shape) + + def call(self, inputs, training=None, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + inputs = concat_func(inputs, axis=1) + Z = reduce_mean(inputs, axis=-1, ) + + A_1 = tf.nn.relu(self.tensordot([Z, self.W_1])) + A_2 = tf.nn.relu(self.tensordot([A_1, self.W_2])) + V = tf.multiply(inputs, tf.expand_dims(A_2, axis=2)) + + return tf.split(V, self.filed_size, axis=1) + + def compute_output_shape(self, input_shape): + + return input_shape + + def compute_mask(self, inputs, mask=None): + return [None] * self.filed_size + + def get_config(self, ): + config = {'reduction_ratio': self.reduction_ratio, 'seed': self.seed} + base_config = super(SENETLayer, self).get_config() + base_config.update(config) + return base_config + + +class BilinearInteraction(Layer): + """BilinearInteraction Layer used in FiBiNET. + + Input shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Its length is ``filed_size``. + + Output shape + - 3D tensor with shape: ``(batch_size,filed_size*(filed_size-1)/2,embedding_size)``. + + Arguments + - **bilinear_type** : String, types of bilinear functions used in this layer. + + - **seed** : A Python integer to use as random seed. + + References + - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) + + """ + + def __init__(self, bilinear_type="interaction", seed=1024, **kwargs): + self.bilinear_type = bilinear_type + self.seed = seed + + super(BilinearInteraction, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `AttentionalFM` layer should be called ' + 'on a list of at least 2 inputs') + embedding_size = int(input_shape[0][-1]) + + if self.bilinear_type == "all": + self.W = self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( + seed=self.seed), name="bilinear_weight") + elif self.bilinear_type == "each": + self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( + seed=self.seed), name="bilinear_weight" + str(i)) for i in range(len(input_shape) - 1)] + elif self.bilinear_type == "interaction": + self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( + seed=self.seed), name="bilinear_weight" + str(i) + '_' + str(j)) for i, j in + itertools.combinations(range(len(input_shape)), 2)] + else: + raise NotImplementedError + + super(BilinearInteraction, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + n = len(inputs) + if self.bilinear_type == "all": + vidots = [tf.tensordot(inputs[i], self.W, axes=(-1, 0)) for i in range(n)] + p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)] + elif self.bilinear_type == "each": + vidots = [tf.tensordot(inputs[i], self.W_list[i], axes=(-1, 0)) for i in range(n - 1)] + p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)] + elif self.bilinear_type == "interaction": + p = [tf.multiply(tf.tensordot(v[0], w, axes=(-1, 0)), v[1]) + for v, w in zip(itertools.combinations(inputs, 2), self.W_list)] + else: + raise NotImplementedError + output = concat_func(p, axis=1) + return output + + def compute_output_shape(self, input_shape): + filed_size = len(input_shape) + embedding_size = input_shape[0][-1] + + return (None, filed_size * (filed_size - 1) // 2, embedding_size) + + def get_config(self, ): + config = {'bilinear_type': self.bilinear_type, 'seed': self.seed} + base_config = super(BilinearInteraction, self).get_config() + base_config.update(config) + return base_config + + +class FieldWiseBiInteraction(Layer): + """Field-Wise Bi-Interaction Layer used in FLEN,compress the + pairwise element-wise product of features into one single vector. + + Input shape + - A list of 3D tensor with shape:``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size,embedding_size)``. + + Arguments + - **use_bias** : Boolean, if use bias. + - **seed** : A Python integer to use as random seed. + + References + - [FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690) + + """ + + def __init__(self, use_bias=True, seed=1024, **kwargs): + self.use_bias = use_bias + self.seed = seed + + super(FieldWiseBiInteraction, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError( + 'A `Field-Wise Bi-Interaction` layer should be called ' + 'on a list of at least 2 inputs') + + self.num_fields = len(input_shape) + embedding_size = input_shape[0][-1] + + self.kernel_mf = self.add_weight( + name='kernel_mf', + shape=(int(self.num_fields * (self.num_fields - 1) / 2), 1), + initializer=Ones(), + regularizer=None, + trainable=True) + + self.kernel_fm = self.add_weight( + name='kernel_fm', + shape=(self.num_fields, 1), + initializer=Constant(value=0.5), + regularizer=None, + trainable=True) + if self.use_bias: + self.bias_mf = self.add_weight(name='bias_mf', + shape=(embedding_size), + initializer=Zeros()) + self.bias_fm = self.add_weight(name='bias_fm', + shape=(embedding_size), + initializer=Zeros()) + + super(FieldWiseBiInteraction, + self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % + (K.ndim(inputs))) + + field_wise_embeds_list = inputs + + # MF module + field_wise_vectors = tf.concat([ + reduce_sum(field_i_vectors, axis=1, keep_dims=True) + for field_i_vectors in field_wise_embeds_list + ], 1) + + left = [] + right = [] + + for i, j in itertools.combinations(list(range(self.num_fields)), 2): + left.append(i) + right.append(j) + + embeddings_left = tf.gather(params=field_wise_vectors, + indices=left, + axis=1) + embeddings_right = tf.gather(params=field_wise_vectors, + indices=right, + axis=1) + + embeddings_prod = embeddings_left * embeddings_right + field_weighted_embedding = embeddings_prod * self.kernel_mf + h_mf = reduce_sum(field_weighted_embedding, axis=1) + if self.use_bias: + h_mf = tf.nn.bias_add(h_mf, self.bias_mf) + + # FM module + square_of_sum_list = [ + tf.square(reduce_sum(field_i_vectors, axis=1, keep_dims=True)) + for field_i_vectors in field_wise_embeds_list + ] + sum_of_square_list = [ + reduce_sum(field_i_vectors * field_i_vectors, + axis=1, + keep_dims=True) + for field_i_vectors in field_wise_embeds_list + ] + + field_fm = tf.concat([ + square_of_sum - sum_of_square for square_of_sum, sum_of_square in + zip(square_of_sum_list, sum_of_square_list) + ], 1) + + h_fm = reduce_sum(field_fm * self.kernel_fm, axis=1) + if self.use_bias: + h_fm = tf.nn.bias_add(h_fm, self.bias_fm) + + return h_mf + h_fm + + def compute_output_shape(self, input_shape): + return (None, input_shape[0][-1]) + + def get_config(self, ): + config = {'use_bias': self.use_bias, 'seed': self.seed} + base_config = super(FieldWiseBiInteraction, self).get_config() + base_config.update(config) + return base_config + + +class FwFMLayer(Layer): + """Field-weighted Factorization Machines + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + + Arguments + - **num_fields** : integer for number of fields + - **regularizer** : L2 regularizer weight for the field strength parameters of PNN + + References + - [Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising] + https://arxiv.org/pdf/1806.03514.pdf + """ + + def __init__(self, num_fields=4, regularizer=0.000001, **kwargs): + self.num_fields = num_fields + self.regularizer = regularizer + super(FwFMLayer, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError("Unexpected inputs dimensions % d,\ + expect to be 3 dimensions" % (len(input_shape))) + + if input_shape[1] != self.num_fields: + raise ValueError("Mismatch in number of fields {} and \ + concatenated embeddings dims {}".format(self.num_fields, input_shape[1])) + + self.field_strengths = self.add_weight(name='field_pair_strengths', + shape=(self.num_fields, self.num_fields), + initializer=TruncatedNormal(), + regularizer=l2(self.regularizer), + trainable=True) + + super(FwFMLayer, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" + % (K.ndim(inputs))) + + if inputs.shape[1] != self.num_fields: + raise ValueError("Mismatch in number of fields {} and \ + concatenated embeddings dims {}".format(self.num_fields, inputs.shape[1])) + + pairwise_inner_prods = [] + for fi, fj in itertools.combinations(range(self.num_fields), 2): + # get field strength for pair fi and fj + r_ij = self.field_strengths[fi, fj] + + # get embeddings for the features of both the fields + feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1) + feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1) + + f = tf.scalar_mul(r_ij, batch_dot(feat_embed_i, feat_embed_j, axes=1)) + pairwise_inner_prods.append(f) + + sum_ = tf.add_n(pairwise_inner_prods) + return sum_ + + def compute_output_shape(self, input_shape): + return (None, 1) + + def get_config(self): + config = super(FwFMLayer, self).get_config().copy() + config.update({ + 'num_fields': self.num_fields, + 'regularizer': self.regularizer + }) + return config + + +class FEFMLayer(Layer): + """Field-Embedded Factorization Machines + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: + ``(batch_size, (num_fields * (num_fields-1))/2)`` # concatenated FEFM interaction embeddings + + Arguments + - **regularizer** : L2 regularizer weight for the field pair matrix embeddings parameters of FEFM + + References + - [Field-Embedded Factorization Machines for Click-through Rate Prediction] + https://arxiv.org/pdf/2009.09931.pdf + """ + + def __init__(self, regularizer, **kwargs): + self.regularizer = regularizer + super(FEFMLayer, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError("Unexpected inputs dimensions % d,\ + expect to be 3 dimensions" % (len(input_shape))) + + self.num_fields = int(input_shape[1]) + embedding_size = int(input_shape[2]) + + self.field_embeddings = {} + for fi, fj in itertools.combinations(range(self.num_fields), 2): + field_pair_id = str(fi) + "-" + str(fj) + self.field_embeddings[field_pair_id] = self.add_weight(name='field_embeddings' + field_pair_id, + shape=(embedding_size, embedding_size), + initializer=TruncatedNormal(), + regularizer=l2(self.regularizer), + trainable=True) + + super(FEFMLayer, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" + % (K.ndim(inputs))) + + pairwise_inner_prods = [] + for fi, fj in itertools.combinations(range(self.num_fields), 2): + field_pair_id = str(fi) + "-" + str(fj) + feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1) + feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1) + field_pair_embed_ij = self.field_embeddings[field_pair_id] + + feat_embed_i_tr = tf.matmul(feat_embed_i, field_pair_embed_ij + tf.transpose(field_pair_embed_ij)) + + f = batch_dot(feat_embed_i_tr, feat_embed_j, axes=1) + pairwise_inner_prods.append(f) + + concat_vec = tf.concat(pairwise_inner_prods, axis=1) + return concat_vec + + def compute_output_shape(self, input_shape): + num_fields = int(input_shape[1]) + return (None, (num_fields * (num_fields - 1)) / 2) + + def get_config(self): + config = super(FEFMLayer, self).get_config().copy() + config.update({ + 'regularizer': self.regularizer, + }) + return config diff --git a/modelzoo/FNN/script/layers/normalization.py b/modelzoo/FNN/script/layers/normalization.py new file mode 100644 index 00000000000..3fceb1257d8 --- /dev/null +++ b/modelzoo/FNN/script/layers/normalization.py @@ -0,0 +1,51 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +from tensorflow.python.keras import backend as K +from tensorflow.python.keras.layers import Layer + +try: + from tensorflow.python.ops.init_ops import Zeros, Ones +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros, Ones + + +class LayerNormalization(Layer): + def __init__(self, axis=-1, eps=1e-9, center=True, + scale=True, **kwargs): + self.axis = axis + self.eps = eps + self.center = center + self.scale = scale + super(LayerNormalization, self).__init__(**kwargs) + + def build(self, input_shape): + self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], + initializer=Ones(), trainable=True) + self.beta = self.add_weight(name='beta', shape=input_shape[-1:], + initializer=Zeros(), trainable=True) + super(LayerNormalization, self).build(input_shape) + + def call(self, inputs): + mean = K.mean(inputs, axis=self.axis, keepdims=True) + variance = K.mean(K.square(inputs - mean), axis=-1, keepdims=True) + std = K.sqrt(variance + self.eps) + outputs = (inputs - mean) / std + if self.scale: + outputs *= self.gamma + if self.center: + outputs += self.beta + return outputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'axis': self.axis, 'eps': self.eps, 'center': self.center, 'scale': self.scale} + base_config = super(LayerNormalization, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/modelzoo/FNN/script/layers/sequence.py b/modelzoo/FNN/script/layers/sequence.py new file mode 100644 index 00000000000..45a65915c22 --- /dev/null +++ b/modelzoo/FNN/script/layers/sequence.py @@ -0,0 +1,901 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import numpy as np +import tensorflow as tf +from tensorflow.python.keras import backend as K + +try: + from tensorflow.python.ops.init_ops import TruncatedNormal, glorot_uniform_initializer as glorot_uniform, \ + identity_initializer as identity +except ImportError: + from tensorflow.python.ops.init_ops_v2 import TruncatedNormal, glorot_uniform, identity + +from tensorflow.python.keras.layers import LSTM, Lambda, Layer, Dropout + +from .core import LocalActivationUnit +from .normalization import LayerNormalization + +if tf.__version__ >= '2.0.0': + from ..contrib.rnn_v2 import dynamic_rnn +else: + from ..contrib.rnn import dynamic_rnn +from ..contrib.utils import QAAttGRUCell, VecAttGRUCell +from .utils import reduce_sum, reduce_max, div, softmax, reduce_mean + + +class SequencePoolingLayer(Layer): + """The SequencePoolingLayer is used to apply pooling operation(sum,mean,max) on variable-length sequence feature/multi-value feature. + + Input shape + - A list of two tensor [seq_value,seq_len] + + - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` + + - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence. + + Output shape + - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. + + Arguments + - **mode**:str.Pooling operation to be used,can be sum,mean or max. + + - **supports_masking**:If True,the input need to support masking. + """ + + def __init__(self, mode='mean', supports_masking=False, **kwargs): + + if mode not in ['sum', 'mean', 'max']: + raise ValueError("mode must be sum or mean") + self.mode = mode + self.eps = tf.constant(1e-8, tf.float32) + super(SequencePoolingLayer, self).__init__(**kwargs) + + self.supports_masking = supports_masking + + def build(self, input_shape): + if not self.supports_masking: + self.seq_len_max = int(input_shape[0][1]) + super(SequencePoolingLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, seq_value_len_list, mask=None, **kwargs): + if self.supports_masking: + if mask is None: + raise ValueError( + "When supports_masking=True,input must support masking") + uiseq_embed_list = seq_value_len_list + mask = tf.cast(mask, tf.float32) # tf.to_float(mask) + user_behavior_length = reduce_sum(mask, axis=-1, keep_dims=True) + mask = tf.expand_dims(mask, axis=2) + else: + uiseq_embed_list, user_behavior_length = seq_value_len_list + + mask = tf.sequence_mask(user_behavior_length, + self.seq_len_max, dtype=tf.float32) + mask = tf.transpose(mask, (0, 2, 1)) + + embedding_size = uiseq_embed_list.shape[-1] + + mask = tf.tile(mask, [1, 1, embedding_size]) + + if self.mode == "max": + hist = uiseq_embed_list - (1 - mask) * 1e9 + return reduce_max(hist, 1, keep_dims=True) + + hist = reduce_sum(uiseq_embed_list * mask, 1, keep_dims=False) + + if self.mode == "mean": + hist = div(hist, tf.cast(user_behavior_length, tf.float32) + self.eps) + + hist = tf.expand_dims(hist, axis=1) + return hist + + def compute_output_shape(self, input_shape): + if self.supports_masking: + return (None, 1, input_shape[-1]) + else: + return (None, 1, input_shape[0][-1]) + + def compute_mask(self, inputs, mask): + return None + + def get_config(self, ): + config = {'mode': self.mode, 'supports_masking': self.supports_masking} + base_config = super(SequencePoolingLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class WeightedSequenceLayer(Layer): + """The WeightedSequenceLayer is used to apply weight score on variable-length sequence feature/multi-value feature. + + Input shape + - A list of two tensor [seq_value,seq_len,seq_weight] + + - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` + + - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence. + + - seq_weight is a 3D tensor with shape: ``(batch_size, T, 1)`` + + Output shape + - 3D tensor with shape: ``(batch_size, T, embedding_size)``. + + Arguments + - **weight_normalization**: bool.Whether normalize the weight score before applying to sequence. + + - **supports_masking**:If True,the input need to support masking. + """ + + def __init__(self, weight_normalization=True, supports_masking=False, **kwargs): + super(WeightedSequenceLayer, self).__init__(**kwargs) + self.weight_normalization = weight_normalization + self.supports_masking = supports_masking + + def build(self, input_shape): + if not self.supports_masking: + self.seq_len_max = int(input_shape[0][1]) + super(WeightedSequenceLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, input_list, mask=None, **kwargs): + if self.supports_masking: + if mask is None: + raise ValueError( + "When supports_masking=True,input must support masking") + key_input, value_input = input_list + mask = tf.expand_dims(mask[0], axis=2) + else: + key_input, key_length_input, value_input = input_list + mask = tf.sequence_mask(key_length_input, + self.seq_len_max, dtype=tf.bool) + mask = tf.transpose(mask, (0, 2, 1)) + + embedding_size = key_input.shape[-1] + + if self.weight_normalization: + paddings = tf.ones_like(value_input) * (-2 ** 32 + 1) + else: + paddings = tf.zeros_like(value_input) + value_input = tf.where(mask, value_input, paddings) + + if self.weight_normalization: + value_input = softmax(value_input, dim=1) + + if len(value_input.shape) == 2: + value_input = tf.expand_dims(value_input, axis=2) + value_input = tf.tile(value_input, [1, 1, embedding_size]) + + return tf.multiply(key_input, value_input) + + def compute_output_shape(self, input_shape): + return input_shape[0] + + def compute_mask(self, inputs, mask): + if self.supports_masking: + return mask[0] + else: + return None + + def get_config(self, ): + config = {'weight_normalization': self.weight_normalization, 'supports_masking': self.supports_masking} + base_config = super(WeightedSequenceLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class AttentionSequencePoolingLayer(Layer): + """The Attentional sequence pooling operation used in DIN. + + Input shape + - A list of three tensor: [query,keys,keys_length] + + - query is a 3D tensor with shape: ``(batch_size, 1, embedding_size)`` + + - keys is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` + + - keys_length is a 2D tensor with shape: ``(batch_size, 1)`` + + Output shape + - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. + + Arguments + - **att_hidden_units**:list of positive integer, the attention net layer number and units in each layer. + + - **att_activation**: Activation function to use in attention net. + + - **weight_normalization**: bool.Whether normalize the attention score of local activation unit. + + - **supports_masking**:If True,the input need to support masking. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, att_hidden_units=(80, 40), att_activation='sigmoid', weight_normalization=False, + return_score=False, + supports_masking=False, **kwargs): + + self.att_hidden_units = att_hidden_units + self.att_activation = att_activation + self.weight_normalization = weight_normalization + self.return_score = return_score + super(AttentionSequencePoolingLayer, self).__init__(**kwargs) + self.supports_masking = supports_masking + + def build(self, input_shape): + if not self.supports_masking: + if not isinstance(input_shape, list) or len(input_shape) != 3: + raise ValueError('A `AttentionSequencePoolingLayer` layer should be called ' + 'on a list of 3 inputs') + + if len(input_shape[0]) != 3 or len(input_shape[1]) != 3 or len(input_shape[2]) != 2: + raise ValueError( + "Unexpected inputs dimensions,the 3 tensor dimensions are %d,%d and %d , expect to be 3,3 and 2" % ( + len(input_shape[0]), len(input_shape[1]), len(input_shape[2]))) + + if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1 or input_shape[2][1] != 1: + raise ValueError('A `AttentionSequencePoolingLayer` layer requires ' + 'inputs of a 3 tensor with shape (None,1,embedding_size),(None,T,embedding_size) and (None,1)' + 'Got different shapes: %s' % (input_shape)) + else: + pass + self.local_att = LocalActivationUnit( + self.att_hidden_units, self.att_activation, l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, ) + super(AttentionSequencePoolingLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, mask=None, training=None, **kwargs): + + if self.supports_masking: + if mask is None: + raise ValueError( + "When supports_masking=True,input must support masking") + queries, keys = inputs + key_masks = tf.expand_dims(mask[-1], axis=1) + + else: + + queries, keys, keys_length = inputs + hist_len = keys.get_shape()[1] + key_masks = tf.sequence_mask(keys_length, hist_len) + + attention_score = self.local_att([queries, keys], training=training) + + outputs = tf.transpose(attention_score, (0, 2, 1)) + + if self.weight_normalization: + paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) + else: + paddings = tf.zeros_like(outputs) + + outputs = tf.where(key_masks, outputs, paddings) + + if self.weight_normalization: + outputs = softmax(outputs) + + if not self.return_score: + outputs = tf.matmul(outputs, keys) + + if tf.__version__ < '1.13.0': + outputs._uses_learning_phase = attention_score._uses_learning_phase + else: + outputs._uses_learning_phase = training is not None + + return outputs + + def compute_output_shape(self, input_shape): + if self.return_score: + return (None, 1, input_shape[1][1]) + else: + return (None, 1, input_shape[0][-1]) + + def compute_mask(self, inputs, mask): + return None + + def get_config(self, ): + + config = {'att_hidden_units': self.att_hidden_units, 'att_activation': self.att_activation, + 'weight_normalization': self.weight_normalization, 'return_score': self.return_score, + 'supports_masking': self.supports_masking} + base_config = super(AttentionSequencePoolingLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class BiLSTM(Layer): + """A multiple layer Bidirectional Residual LSTM Layer. + + Input shape + - 3D tensor with shape ``(batch_size, timesteps, input_dim)``. + + Output shape + - 3D tensor with shape: ``(batch_size, timesteps, units)``. + + Arguments + - **units**: Positive integer, dimensionality of the output space. + + - **layers**:Positive integer, number of LSTM layers to stacked. + + - **res_layers**: Positive integer, number of residual connection to used in last ``res_layers``. + + - **dropout_rate**: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. + + - **merge_mode**: merge_mode: Mode by which outputs of the forward and backward RNNs will be combined. One of { ``'fw'`` , ``'bw'`` , ``'sum'`` , ``'mul'`` , ``'concat'`` , ``'ave'`` , ``None`` }. If None, the outputs will not be combined, they will be returned as a list. + + + """ + + def __init__(self, units, layers=2, res_layers=0, dropout_rate=0.2, merge_mode='ave', **kwargs): + + if merge_mode not in ['fw', 'bw', 'sum', 'mul', 'ave', 'concat', None]: + raise ValueError('Invalid merge mode. ' + 'Merge mode should be one of ' + '{"fw","bw","sum", "mul", "ave", "concat", None}') + + self.units = units + self.layers = layers + self.res_layers = res_layers + self.dropout_rate = dropout_rate + self.merge_mode = merge_mode + + super(BiLSTM, self).__init__(**kwargs) + self.supports_masking = True + + def build(self, input_shape): + + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + self.fw_lstm = [] + self.bw_lstm = [] + for _ in range(self.layers): + self.fw_lstm.append( + LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True, + unroll=True)) + self.bw_lstm.append( + LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True, + go_backwards=True, unroll=True)) + + super(BiLSTM, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, mask=None, **kwargs): + + input_fw = inputs + input_bw = inputs + for i in range(self.layers): + output_fw = self.fw_lstm[i](input_fw) + output_bw = self.bw_lstm[i](input_bw) + output_bw = Lambda(lambda x: K.reverse( + x, 1), mask=lambda inputs, mask: mask)(output_bw) + + if i >= self.layers - self.res_layers: + output_fw += input_fw + output_bw += input_bw + input_fw = output_fw + input_bw = output_bw + + output_fw = input_fw + output_bw = input_bw + + if self.merge_mode == "fw": + output = output_fw + elif self.merge_mode == "bw": + output = output_bw + elif self.merge_mode == 'concat': + output = K.concatenate([output_fw, output_bw]) + elif self.merge_mode == 'sum': + output = output_fw + output_bw + elif self.merge_mode == 'ave': + output = (output_fw + output_bw) / 2 + elif self.merge_mode == 'mul': + output = output_fw * output_bw + elif self.merge_mode is None: + output = [output_fw, output_bw] + + return output + + def compute_output_shape(self, input_shape): + print(self.merge_mode) + if self.merge_mode is None: + return [input_shape, input_shape] + elif self.merge_mode == 'concat': + return input_shape[:-1] + (input_shape[-1] * 2,) + else: + return input_shape + + def compute_mask(self, inputs, mask): + return mask + + def get_config(self, ): + + config = {'units': self.units, 'layers': self.layers, + 'res_layers': self.res_layers, 'dropout_rate': self.dropout_rate, 'merge_mode': self.merge_mode} + base_config = super(BiLSTM, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class Transformer(Layer): + """ Simplified version of Transformer proposed in 《Attention is all you need》 + + Input shape + - a list of two 3D tensor with shape ``(batch_size, timesteps, input_dim)`` if ``supports_masking=True`` . + - a list of two 4 tensors, first two tensors with shape ``(batch_size, timesteps, input_dim)``,last two tensors with shape ``(batch_size, 1)`` if ``supports_masking=False`` . + + + Output shape + - 3D tensor with shape: ``(batch_size, 1, input_dim)`` if ``output_type='mean'`` or ``output_type='sum'`` , else ``(batch_size, timesteps, input_dim)`` . + + + Arguments + - **att_embedding_size**: int.The embedding size in multi-head self-attention network. + - **head_num**: int.The head number in multi-head self-attention network. + - **dropout_rate**: float between 0 and 1. Fraction of the units to drop. + - **use_positional_encoding**: bool. Whether or not use positional_encoding + - **use_res**: bool. Whether or not use standard residual connections before output. + - **use_feed_forward**: bool. Whether or not use pointwise feed foward network. + - **use_layer_norm**: bool. Whether or not use Layer Normalization. + - **blinding**: bool. Whether or not use blinding. + - **seed**: A Python integer to use as random seed. + - **supports_masking**:bool. Whether or not support masking. + - **attention_type**: str, Type of attention, the value must be one of { ``'scaled_dot_product'`` , ``'additive'`` }. + - **output_type**: ``'mean'`` , ``'sum'`` or `None`. Whether or not use average/sum pooling for output. + + References + - [Vaswani, Ashish, et al. "Attention is all you need." Advances in Neural Information Processing Systems. 2017.](https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf) + """ + + def __init__(self, att_embedding_size=1, head_num=8, dropout_rate=0.0, use_positional_encoding=True, use_res=True, + use_feed_forward=True, use_layer_norm=False, blinding=True, seed=1024, supports_masking=False, + attention_type="scaled_dot_product", output_type="mean", **kwargs): + if head_num <= 0: + raise ValueError('head_num must be a int > 0') + self.att_embedding_size = att_embedding_size + self.head_num = head_num + self.num_units = att_embedding_size * head_num + self.use_res = use_res + self.use_feed_forward = use_feed_forward + self.seed = seed + self.use_positional_encoding = use_positional_encoding + self.dropout_rate = dropout_rate + self.use_layer_norm = use_layer_norm + self.blinding = blinding + self.attention_type = attention_type + self.output_type = output_type + super(Transformer, self).__init__(**kwargs) + self.supports_masking = supports_masking + + def build(self, input_shape): + embedding_size = int(input_shape[0][-1]) + if self.num_units != embedding_size: + raise ValueError( + "att_embedding_size * head_num must equal the last dimension size of inputs,got %d * %d != %d" % ( + self.att_embedding_size, self.head_num, embedding_size)) + self.seq_len_max = int(input_shape[0][-2]) + self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed)) + self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 1)) + self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 2)) + if self.attention_type == "additive": + self.b = self.add_weight('b', shape=[self.att_embedding_size], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + self.v = self.add_weight('v', shape=[self.att_embedding_size], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + # if self.use_res: + # self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, + # initializer=TruncatedNormal(seed=self.seed)) + if self.use_feed_forward: + self.fw1 = self.add_weight('fw1', shape=[self.num_units, 4 * self.num_units], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + self.fw2 = self.add_weight('fw2', shape=[4 * self.num_units, self.num_units], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + + self.dropout = Dropout( + self.dropout_rate, seed=self.seed) + self.ln = LayerNormalization() + if self.use_positional_encoding: + self.query_pe = PositionEncoding() + self.key_pe = PositionEncoding() + # Be sure to call this somewhere! + super(Transformer, self).build(input_shape) + + def call(self, inputs, mask=None, training=None, **kwargs): + + if self.supports_masking: + queries, keys = inputs + query_masks, key_masks = mask + query_masks = tf.cast(query_masks, tf.float32) + key_masks = tf.cast(key_masks, tf.float32) + else: + queries, keys, query_masks, key_masks = inputs + + query_masks = tf.sequence_mask( + query_masks, self.seq_len_max, dtype=tf.float32) + key_masks = tf.sequence_mask( + key_masks, self.seq_len_max, dtype=tf.float32) + query_masks = tf.squeeze(query_masks, axis=1) + key_masks = tf.squeeze(key_masks, axis=1) + + if self.use_positional_encoding: + queries = self.query_pe(queries) + keys = self.key_pe(queries) + + querys = tf.tensordot(queries, self.W_Query, + axes=(-1, 0)) # None T_q D*head_num + keys = tf.tensordot(keys, self.W_key, axes=(-1, 0)) + values = tf.tensordot(keys, self.W_Value, axes=(-1, 0)) + + # head_num*None T_q D + querys = tf.concat(tf.split(querys, self.head_num, axis=2), axis=0) + keys = tf.concat(tf.split(keys, self.head_num, axis=2), axis=0) + values = tf.concat(tf.split(values, self.head_num, axis=2), axis=0) + + if self.attention_type == "scaled_dot_product": + # head_num*None T_q T_k + outputs = tf.matmul(querys, keys, transpose_b=True) + + outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5) + elif self.attention_type == "additive": + querys_reshaped = tf.expand_dims(querys, axis=-2) + keys_reshaped = tf.expand_dims(keys, axis=-3) + outputs = tf.tanh(tf.nn.bias_add(querys_reshaped + keys_reshaped, self.b)) + outputs = tf.squeeze(tf.tensordot(outputs, tf.expand_dims(self.v, axis=-1), axes=[-1, 0]), axis=-1) + else: + raise ValueError("attention_type must be scaled_dot_product or additive") + + key_masks = tf.tile(key_masks, [self.head_num, 1]) + + # (h*N, T_q, T_k) + key_masks = tf.tile(tf.expand_dims(key_masks, 1), + [1, tf.shape(queries)[1], 1]) + + paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) + + # (h*N, T_q, T_k) + + outputs = tf.where(tf.equal(key_masks, 1), outputs, paddings, ) + if self.blinding: + try: + outputs = tf.matrix_set_diag(outputs, tf.ones_like(outputs)[ + :, :, 0] * (-2 ** 32 + 1)) + except AttributeError: + outputs = tf.compat.v1.matrix_set_diag(outputs, tf.ones_like(outputs)[ + :, :, 0] * (-2 ** 32 + 1)) + + outputs -= reduce_max(outputs, axis=-1, keep_dims=True) + outputs = softmax(outputs) + query_masks = tf.tile(query_masks, [self.head_num, 1]) # (h*N, T_q) + # (h*N, T_q, T_k) + query_masks = tf.tile(tf.expand_dims( + query_masks, -1), [1, 1, tf.shape(keys)[1]]) + + outputs *= query_masks + + outputs = self.dropout(outputs, training=training) + # Weighted sum + # ( h*N, T_q, C/h) + result = tf.matmul(outputs, values) + result = tf.concat(tf.split(result, self.head_num, axis=0), axis=2) + + if self.use_res: + # tf.tensordot(queries, self.W_Res, axes=(-1, 0)) + result += queries + if self.use_layer_norm: + result = self.ln(result) + + if self.use_feed_forward: + fw1 = tf.nn.relu(tf.tensordot(result, self.fw1, axes=[-1, 0])) + fw1 = self.dropout(fw1, training=training) + fw2 = tf.tensordot(fw1, self.fw2, axes=[-1, 0]) + if self.use_res: + result += fw2 + if self.use_layer_norm: + result = self.ln(result) + + if self.output_type == "mean": + return reduce_mean(result, axis=1, keep_dims=True) + elif self.output_type == "sum": + return reduce_sum(result, axis=1, keep_dims=True) + else: + return result + + def compute_output_shape(self, input_shape): + + return (None, 1, self.att_embedding_size * self.head_num) + + def compute_mask(self, inputs, mask=None): + return None + + def get_config(self, ): + config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, + 'dropout_rate': self.dropout_rate, 'use_res': self.use_res, + 'use_positional_encoding': self.use_positional_encoding, 'use_feed_forward': self.use_feed_forward, + 'use_layer_norm': self.use_layer_norm, 'seed': self.seed, 'supports_masking': self.supports_masking, + 'blinding': self.blinding, 'attention_type': self.attention_type, 'output_type': self.output_type} + base_config = super(Transformer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class PositionEncoding(Layer): + def __init__(self, pos_embedding_trainable=True, + zero_pad=False, + scale=True, **kwargs): + self.pos_embedding_trainable = pos_embedding_trainable + self.zero_pad = zero_pad + self.scale = scale + super(PositionEncoding, self).__init__(**kwargs) + + def build(self, input_shape): + # Create a trainable weight variable for this layer. + _, T, num_units = input_shape.as_list() # inputs.get_shape().as_list() + # First part of the PE function: sin and cos argument + position_enc = np.array([ + [pos / np.power(10000, 2. * (i // 2) / num_units) for i in range(num_units)] + for pos in range(T)]) + + # Second part, apply the cosine to even columns and sin to odds. + position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i + position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1 + if self.zero_pad: + position_enc[0, :] = np.zeros(num_units) + self.lookup_table = self.add_weight("lookup_table", (T, num_units), + initializer=identity(position_enc), + trainable=self.pos_embedding_trainable) + + # Be sure to call this somewhere! + super(PositionEncoding, self).build(input_shape) + + def call(self, inputs, mask=None): + _, T, num_units = inputs.get_shape().as_list() + position_ind = tf.expand_dims(tf.range(T), 0) + outputs = tf.nn.embedding_lookup(self.lookup_table, position_ind) + if self.scale: + outputs = outputs * num_units ** 0.5 + return outputs + inputs + + def compute_output_shape(self, input_shape): + + return input_shape + + def compute_mask(self, inputs, mask=None): + return mask + + def get_config(self, ): + + config = {'pos_embedding_trainable': self.pos_embedding_trainable, 'zero_pad': self.zero_pad, + 'scale': self.scale} + base_config = super(PositionEncoding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class BiasEncoding(Layer): + def __init__(self, sess_max_count, seed=1024, **kwargs): + self.sess_max_count = sess_max_count + self.seed = seed + super(BiasEncoding, self).__init__(**kwargs) + + def build(self, input_shape): + # Create a trainable weight variable for this layer. + + if self.sess_max_count == 1: + embed_size = input_shape[2].value + seq_len_max = input_shape[1].value + else: + try: + embed_size = input_shape[0][2].value + seq_len_max = input_shape[0][1].value + except AttributeError: + embed_size = input_shape[0][2] + seq_len_max = input_shape[0][1] + + self.sess_bias_embedding = self.add_weight('sess_bias_embedding', shape=(self.sess_max_count, 1, 1), + initializer=TruncatedNormal( + mean=0.0, stddev=0.0001, seed=self.seed)) + self.seq_bias_embedding = self.add_weight('seq_bias_embedding', shape=(1, seq_len_max, 1), + initializer=TruncatedNormal( + mean=0.0, stddev=0.0001, seed=self.seed)) + self.item_bias_embedding = self.add_weight('item_bias_embedding', shape=(1, 1, embed_size), + initializer=TruncatedNormal( + mean=0.0, stddev=0.0001, seed=self.seed)) + + # Be sure to call this somewhere! + super(BiasEncoding, self).build(input_shape) + + def call(self, inputs, mask=None): + """ + :param concated_embeds_value: None * field_size * embedding_size + :return: None*1 + """ + transformer_out = [] + for i in range(self.sess_max_count): + transformer_out.append( + inputs[i] + self.item_bias_embedding + self.seq_bias_embedding + self.sess_bias_embedding[i]) + return transformer_out + + def compute_output_shape(self, input_shape): + + return input_shape + + def compute_mask(self, inputs, mask=None): + return mask + + def get_config(self, ): + + config = {'sess_max_count': self.sess_max_count, 'seed': self.seed, } + base_config = super(BiasEncoding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class DynamicGRU(Layer): + def __init__(self, num_units=None, gru_type='GRU', return_sequence=True, **kwargs): + + self.num_units = num_units + self.return_sequence = return_sequence + self.gru_type = gru_type + super(DynamicGRU, self).__init__(**kwargs) + + def build(self, input_shape): + # Create a trainable weight variable for this layer. + input_seq_shape = input_shape[0] + if self.num_units is None: + self.num_units = input_seq_shape.as_list()[-1] + if self.gru_type == "AGRU": + self.gru_cell = QAAttGRUCell(self.num_units) + elif self.gru_type == "AUGRU": + self.gru_cell = VecAttGRUCell(self.num_units) + else: + try: + self.gru_cell = tf.nn.rnn_cell.GRUCell(self.num_units) # GRUCell + except AttributeError: + self.gru_cell = tf.compat.v1.nn.rnn_cell.GRUCell(self.num_units) + + # Be sure to call this somewhere! + super(DynamicGRU, self).build(input_shape) + + def call(self, input_list): + """ + :param concated_embeds_value: None * field_size * embedding_size + :return: None*1 + """ + if self.gru_type == "GRU" or self.gru_type == "AIGRU": + rnn_input, sequence_length = input_list + att_score = None + else: + rnn_input, sequence_length, att_score = input_list + + rnn_output, hidden_state = dynamic_rnn(self.gru_cell, inputs=rnn_input, att_scores=att_score, + sequence_length=tf.squeeze(sequence_length, + ), dtype=tf.float32, scope=self.name) + if self.return_sequence: + return rnn_output + else: + return tf.expand_dims(hidden_state, axis=1) + + def compute_output_shape(self, input_shape): + rnn_input_shape = input_shape[0] + if self.return_sequence: + return rnn_input_shape + else: + return (None, 1, rnn_input_shape[2]) + + def get_config(self, ): + config = {'num_units': self.num_units, 'gru_type': self.gru_type, 'return_sequence': self.return_sequence} + base_config = super(DynamicGRU, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class KMaxPooling(Layer): + """K Max pooling that selects the k biggest value along the specific axis. + + Input shape + - nD tensor with shape: ``(batch_size, ..., input_dim)``. + + Output shape + - nD tensor with shape: ``(batch_size, ..., output_dim)``. + + Arguments + - **k**: positive integer, number of top elements to look for along the ``axis`` dimension. + + - **axis**: positive integer, the dimension to look for elements. + + """ + + def __init__(self, k=1, axis=-1, **kwargs): + + self.k = k + self.axis = axis + super(KMaxPooling, self).__init__(**kwargs) + + def build(self, input_shape): + + if self.axis < 1 or self.axis > len(input_shape): + raise ValueError("axis must be 1~%d,now is %d" % + (len(input_shape), self.axis)) + + if self.k < 1 or self.k > input_shape[self.axis]: + raise ValueError("k must be in 1 ~ %d,now k is %d" % + (input_shape[self.axis], self.k)) + self.dims = len(input_shape) + # Be sure to call this somewhere! + super(KMaxPooling, self).build(input_shape) + + def call(self, inputs): + + # swap the last and the axis dimensions since top_k will be applied along the last dimension + perm = list(range(self.dims)) + perm[-1], perm[self.axis] = perm[self.axis], perm[-1] + shifted_input = tf.transpose(inputs, perm) + + # extract top_k, returns two tensors [values, indices] + top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0] + output = tf.transpose(top_k, perm) + + return output + + def compute_output_shape(self, input_shape): + output_shape = list(input_shape) + output_shape[self.axis] = self.k + return tuple(output_shape) + + def get_config(self, ): + config = {'k': self.k, 'axis': self.axis} + base_config = super(KMaxPooling, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + +# def positional_encoding(inputs, +# pos_embedding_trainable=True, +# zero_pad=False, +# scale=True, +# ): +# '''Sinusoidal Positional_Encoding. +# +# Args: +# +# - inputs: A 2d Tensor with shape of (N, T). +# - num_units: Output dimensionality +# - zero_pad: Boolean. If True, all the values of the first row (id = 0) should be constant zero +# - scale: Boolean. If True, the output will be multiplied by sqrt num_units(check details from paper) +# - scope: Optional scope for `variable_scope`. +# - reuse: Boolean, whether to reuse the weights of a previous layer by the same name. +# +# Returns: +# +# - A 'Tensor' with one more rank than inputs's, with the dimensionality should be 'num_units' +# ''' +# +# _, T, num_units = inputs.get_shape().as_list() +# # with tf.variable_scope(scope, reuse=reuse): +# position_ind = tf.expand_dims(tf.range(T), 0) +# # First part of the PE function: sin and cos argument +# position_enc = np.array([ +# [pos / np.power(10000, 2. * i / num_units) +# for i in range(num_units)] +# for pos in range(T)]) +# +# # Second part, apply the cosine to even columns and sin to odds. +# position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i +# position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1 +# +# # Convert to a tensor +# +# if pos_embedding_trainable: +# lookup_table = K.variable(position_enc, dtype=tf.float32) +# +# if zero_pad: +# lookup_table = tf.concat((tf.zeros(shape=[1, num_units]), +# lookup_table[1:, :]), 0) +# +# outputs = tf.nn.embedding_lookup(lookup_table, position_ind) +# +# if scale: +# outputs = outputs * num_units ** 0.5 +# return outputs + inputs diff --git a/modelzoo/FNN/script/layers/utils.py b/modelzoo/FNN/script/layers/utils.py new file mode 100644 index 00000000000..2be8f3fe5ef --- /dev/null +++ b/modelzoo/FNN/script/layers/utils.py @@ -0,0 +1,302 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" +import tensorflow as tf +from tensorflow.python.keras.layers import Flatten, Concatenate, Layer, Add +from tensorflow.python.ops.lookup_ops import TextFileInitializer + +try: + from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal + +from tensorflow.python.keras.regularizers import l2 + +try: + from tensorflow.python.ops.lookup_ops import StaticHashTable +except ImportError: + from tensorflow.python.ops.lookup_ops import HashTable as StaticHashTable + + +class NoMask(Layer): + def __init__(self, **kwargs): + super(NoMask, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(NoMask, self).build(input_shape) + + def call(self, x, mask=None, **kwargs): + return x + + def compute_mask(self, inputs, mask): + return None + + +class Hash(Layer): + """Looks up keys in a table when setup `vocabulary_path`, which outputs the corresponding values. + If `vocabulary_path` is not set, `Hash` will hash the input to [0,num_buckets). When `mask_zero` = True, + input value `0` or `0.0` will be set to `0`, and other value will be set in range [1,num_buckets). + + The following snippet initializes a `Hash` with `vocabulary_path` file with the first column as keys and + second column as values: + + * `1,emerson` + * `2,lake` + * `3,palmer` + + >>> hash = Hash( + ... num_buckets=3+1, + ... vocabulary_path=filename, + ... default_value=0) + >>> hash(tf.constant('lake')).numpy() + 2 + >>> hash(tf.constant('lakeemerson')).numpy() + 0 + + Args: + num_buckets: An `int` that is >= 1. The number of buckets or the vocabulary size + 1 + when `vocabulary_path` is setup. + mask_zero: default is False. The `Hash` value will hash input `0` or `0.0` to value `0` when + the `mask_zero` is `True`. `mask_zero` is not used when `vocabulary_path` is setup. + vocabulary_path: default `None`. The `CSV` text file path of the vocabulary hash, which contains + two columns seperated by delimiter `comma`, the first column is the value and the second is + the key. The key data type is `string`, the value data type is `int`. The path must + be accessible from wherever `Hash` is initialized. + default_value: default '0'. The default value if a key is missing in the table. + **kwargs: Additional keyword arguments. + """ + + def __init__(self, num_buckets, mask_zero=False, vocabulary_path=None, default_value=0, **kwargs): + self.num_buckets = num_buckets + self.mask_zero = mask_zero + self.vocabulary_path = vocabulary_path + self.default_value = default_value + if self.vocabulary_path: + initializer = TextFileInitializer(vocabulary_path, 'string', 1, 'int64', 0, delimiter=',') + self.hash_table = StaticHashTable(initializer, default_value=self.default_value) + super(Hash, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(Hash, self).build(input_shape) + + def call(self, x, mask=None, **kwargs): + + if x.dtype != tf.string: + zero = tf.as_string(tf.zeros([1], dtype=x.dtype)) + x = tf.as_string(x, ) + else: + zero = tf.as_string(tf.zeros([1], dtype='int32')) + + if self.vocabulary_path: + hash_x = self.hash_table.lookup(x) + return hash_x + + num_buckets = self.num_buckets if not self.mask_zero else self.num_buckets - 1 + try: + hash_x = tf.string_to_hash_bucket_fast(x, num_buckets, + name=None) # weak hash + except AttributeError: + hash_x = tf.strings.to_hash_bucket_fast(x, num_buckets, + name=None) # weak hash + if self.mask_zero: + mask = tf.cast(tf.not_equal(x, zero), dtype='int64') + hash_x = (hash_x + 1) * mask + + return hash_x + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'num_buckets': self.num_buckets, 'mask_zero': self.mask_zero, 'vocabulary_path': self.vocabulary_path, + 'default_value': self.default_value} + base_config = super(Hash, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class Linear(Layer): + + def __init__(self, l2_reg=0.0, mode=0, use_bias=False, seed=1024, **kwargs): + + self.l2_reg = l2_reg + # self.l2_reg = tf.contrib.layers.l2_regularizer(float(l2_reg_linear)) + if mode not in [0, 1, 2]: + raise ValueError("mode must be 0,1 or 2") + self.mode = mode + self.use_bias = use_bias + self.seed = seed + super(Linear, self).__init__(**kwargs) + + def build(self, input_shape): + if self.use_bias: + self.bias = self.add_weight(name='linear_bias', + shape=(1,), + initializer=Zeros(), + trainable=True) + if self.mode == 1: + self.kernel = self.add_weight( + 'linear_kernel', + shape=[int(input_shape[-1]), 1], + initializer=glorot_normal(self.seed), + regularizer=l2(self.l2_reg), + trainable=True) + elif self.mode == 2: + self.kernel = self.add_weight( + 'linear_kernel', + shape=[int(input_shape[1][-1]), 1], + initializer=glorot_normal(self.seed), + regularizer=l2(self.l2_reg), + trainable=True) + + super(Linear, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if self.mode == 0: + sparse_input = inputs + linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=True) + elif self.mode == 1: + dense_input = inputs + fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0)) + linear_logit = fc + else: + sparse_input, dense_input = inputs + fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0)) + linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=False) + fc + if self.use_bias: + linear_logit += self.bias + + return linear_logit + + def compute_output_shape(self, input_shape): + return (None, 1) + + def compute_mask(self, inputs, mask): + return None + + def get_config(self, ): + config = {'mode': self.mode, 'l2_reg': self.l2_reg, 'use_bias': self.use_bias, 'seed': self.seed} + base_config = super(Linear, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def concat_func(inputs, axis=-1, mask=False): + if not mask: + inputs = list(map(NoMask(), inputs)) + if len(inputs) == 1: + return inputs[0] + else: + return Concatenate(axis=axis)(inputs) + + +def reduce_mean(input_tensor, + axis=None, + keep_dims=False, + name=None, + reduction_indices=None): + try: + return tf.reduce_mean(input_tensor, + axis=axis, + keep_dims=keep_dims, + name=name, + reduction_indices=reduction_indices) + except TypeError: + return tf.reduce_mean(input_tensor, + axis=axis, + keepdims=keep_dims, + name=name) + + +def reduce_sum(input_tensor, + axis=None, + keep_dims=False, + name=None, + reduction_indices=None): + try: + return tf.reduce_sum(input_tensor, + axis=axis, + keep_dims=keep_dims, + name=name, + reduction_indices=reduction_indices) + except TypeError: + return tf.reduce_sum(input_tensor, + axis=axis, + keepdims=keep_dims, + name=name) + + +def reduce_max(input_tensor, + axis=None, + keep_dims=False, + name=None, + reduction_indices=None): + try: + return tf.reduce_max(input_tensor, + axis=axis, + keep_dims=keep_dims, + name=name, + reduction_indices=reduction_indices) + except TypeError: + return tf.reduce_max(input_tensor, + axis=axis, + keepdims=keep_dims, + name=name) + + +def div(x, y, name=None): + try: + return tf.div(x, y, name=name) + except AttributeError: + return tf.divide(x, y, name=name) + + +def softmax(logits, dim=-1, name=None): + try: + return tf.nn.softmax(logits, dim=dim, name=name) + except TypeError: + return tf.nn.softmax(logits, axis=dim, name=name) + + +class _Add(Layer): + def __init__(self, **kwargs): + super(_Add, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(_Add, self).build(input_shape) + + def call(self, inputs, **kwargs): + # if not isinstance(inputs, list): + # return inputs + # if len(inputs) == 1: + # return inputs[0] + if len(inputs) == 0: + return tf.constant([[0.0]]) + + return Add()(inputs) + + +def add_func(inputs): + if not isinstance(inputs, list): + return inputs + if len(inputs) == 1: + return inputs[0] + return _Add()(inputs) + + +def combined_dnn_input(sparse_embedding_list, dense_value_list): + if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0: + sparse_dnn_input = Flatten()(concat_func(sparse_embedding_list)) + dense_dnn_input = Flatten()(concat_func(dense_value_list)) + return concat_func([sparse_dnn_input, dense_dnn_input]) + elif len(sparse_embedding_list) > 0: + return Flatten()(concat_func(sparse_embedding_list)) + elif len(dense_value_list) > 0: + return Flatten()(concat_func(dense_value_list)) + else: + raise NotImplementedError("dnn_feature_columns can not be empty list") diff --git a/modelzoo/FNN/script/models/__init__.py b/modelzoo/FNN/script/models/__init__.py new file mode 100644 index 00000000000..f1bf243569b --- /dev/null +++ b/modelzoo/FNN/script/models/__init__.py @@ -0,0 +1,4 @@ +from .fnn import FNN + + +__all__ = [ "FNN"] diff --git a/modelzoo/FNN/script/models/fnn.py b/modelzoo/FNN/script/models/fnn.py new file mode 100644 index 00000000000..50932f1cc5e --- /dev/null +++ b/modelzoo/FNN/script/models/fnn.py @@ -0,0 +1,53 @@ +# -*- coding:utf-8 -*- +""" +Author: + Weichen Shen, weichenswc@163.com + +Reference: + [1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf) +""" +from tensorflow.python.keras.models import Model +from tensorflow.python.keras.layers import Dense,Embedding + +from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns +from ..layers.core import PredictionLayer, DNN +from ..layers.utils import add_func, combined_dnn_input + + +def FNN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), + l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0.1, seed=1024, dnn_dropout=0, + dnn_activation='relu', task='binary'): + """Instantiates the Factorization-supported Neural Network architecture. + + :param linear_feature_columns: An iterable containing all the features used by linear part of the model. + :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. + :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net + :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector + :param l2_reg_linear: float. L2 regularizer strength applied to linear weight + :param l2_reg_dnn: float . L2 regularizer strength applied to DNN + :param seed: integer ,to use as random seed. + :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. + :param dnn_activation: Activation function to use in DNN + :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + :return: A Keras model instance. + """ + features = build_input_features( + linear_feature_columns + dnn_feature_columns) + + inputs_list = list(features.values()) + + linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', + l2_reg=l2_reg_linear) + + sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, + l2_reg_embedding, seed) + + dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) + deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) + dnn_logit = Dense(1, use_bias=False)(deep_out) + final_logit = add_func([dnn_logit, linear_logit]) + + output = PredictionLayer(task)(final_logit) + + model = Model(inputs=inputs_list, outputs=output) + return model diff --git a/modelzoo/FNN/script/utils.py b/modelzoo/FNN/script/utils.py new file mode 100644 index 00000000000..7fe3b25a518 --- /dev/null +++ b/modelzoo/FNN/script/utils.py @@ -0,0 +1,46 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import json +import logging +from threading import Thread + +import requests + +try: + from packaging.version import parse +except ImportError: + from pip._vendor.packaging.version import parse + + +def check_version(version): + """Return version of package on pypi.python.org using json.""" + + def check(version): + try: + url_pattern = 'https://pypi.python.org/pypi/deepctr/json' + req = requests.get(url_pattern) + latest_version = parse('0') + version = parse(version) + if req.status_code == requests.codes.ok: + j = json.loads(req.text.encode('utf-8')) + releases = j.get('releases', []) + for release in releases: + ver = parse(release) + if ver.is_prerelease or ver.is_postrelease: + continue + latest_version = max(latest_version, ver) + if latest_version > version: + logging.warning( + '\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format( + latest_version, version)) + except: + print("Please check the latest version manually on https://pypi.org/project/deepctr/#history") + return + + Thread(target=check, args=(version,)).start() diff --git a/modelzoo/FNN/train.py b/modelzoo/FNN/train.py new file mode 100644 index 00000000000..92d94bced4e --- /dev/null +++ b/modelzoo/FNN/train.py @@ -0,0 +1,139 @@ +import os +import pandas as pd +import numpy as np +import tensorflow as tf +import pickle as pkl +import math +from tensorflow.keras.preprocessing.sequence import pad_sequences +from tensorflow.keras.optimizers import Adam +from sklearn.metrics import log_loss, roc_auc_score +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder, MinMaxScaler,MultiLabelBinarizer +from script.models.fnn import FNN +from script.feature_column import SparseFeat, DenseFeat, get_feature_names,VarLenSparseFeat +import gc + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +def split(x): + key_ans = x.split(',') + for key in key_ans: + if key not in key2index: + key2index[key] = len(key2index) + 1 + return list(map(lambda x: key2index[x], key_ans)) + +if __name__=="__main__": + path = 'data/' + datalist = ['1458','2259','2261','2997','3386','all'] + + for file in datalist: + + data = pd.read_csv(path+file+'/train.log.txt',encoding="utf-8", + header=0,sep="\t",low_memory=False) + + test_data = pd.read_csv(path+file+'/test.log.txt',encoding="utf-8", + header=0,sep="\t",low_memory=False) + + + data = data[['click','weekday','hour','useragent','IP','region', 'city', 'adexchange', 'domain', 'slotid','slotwidth', + 'slotheight', 'slotvisibility', 'slotformat', 'creative', 'advertiser', 'slotprice']] + + test_data = test_data[['click','weekday','hour','useragent','IP','region', 'city', 'adexchange', 'domain', 'slotid','slotwidth', + 'slotheight', 'slotvisibility', 'slotformat', 'creative', 'advertiser', 'slotprice']] + + data['istest']=0 + test_data['istest']=1 + df = pd.concat([data, test_data], axis=0, ignore_index=True) + del data, test_data + gc.collect() + + + df.dropna(subset=['click'],inplace=True) + + df['adexchange'].fillna(0,inplace=True) + df['adexchange']=df['adexchange'].astype(int) + + + df.fillna('unknown', inplace=True) + + + dense_features = ['weekday', 'hour','region','city','adexchange','slotwidth','slotheight', + 'advertiser', 'slotprice' ] + + + + sparse_features=[] + + target='click' + for col in df.columns: + if col not in dense_features and col not in ['istest','click']: + lbe = LabelEncoder() + df[col] = lbe.fit_transform(df[col]) + df[col]=lbe.fit_transform(df[col]) + sparse_features.append(col) + + mms = MinMaxScaler(feature_range=(0, 1)) + + df[dense_features] = mms.fit_transform(df[dense_features]) + + + fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=df[feat].max() + 1, embedding_dim=11,embeddings_initializer=None) + for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, ) + for feat in dense_features] + + linear_feature_columns = fixlen_feature_columns + dnn_feature_columns = fixlen_feature_columns + + + + + + feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) + + # 3.generate train&test input data for model + cols = [f for f in df.columns if f not in ['click', 'istest']] + train = df[df.istest==0][cols] + test = df[df.istest==1][cols] + + train_model_input = {name: train[name] for name in feature_names} + test_model_input = {name: test[name] for name in feature_names} + + gpu_options = tf.GPUOptions(allow_growth=True) + + + model = FNN(linear_feature_columns, dnn_feature_columns,task='binary',dnn_hidden_units=(128, 64, 32)) + + adam = Adam(learning_rate=0.001,amsgrad=False) + + model.compile(adam, "binary_crossentropy", + metrics=['binary_crossentropy','AUC']) + + with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: + + + sess.run(tf.tables_initializer()) + history = model.fit(train_model_input, df[df.istest==0][target].values, + batch_size=128, epochs=50, verbose=2, validation_split=0.2) + + pred_ans = model.predict(test_model_input, batch_size=128) + + test_auc = roc_auc_score(df[df.istest==1][target].values,pred_ans) + print('test_auc=',test_auc) + + + with open('result/result.txt','a+') as tx: + print(file+" test LogLoss", round(log_loss(df[df.istest==1][target].values, pred_ans), 4),file=tx) + print(file+" test AUC", round(roc_auc_score(df[df.istest==1][target].values, pred_ans), 4),file=tx) + print('='*50,file=tx) + + + + + + + + + + + From 4e4b4001a7e20d22addb923cc971b82d841b95aa Mon Sep 17 00:00:00 2001 From: lihangtian <936971274@qq.com> Date: Wed, 12 Oct 2022 15:48:09 +0800 Subject: [PATCH 5/8] [ModelZoo] Support Co_Action Network --- modelzoo/CAN/README.md | 15 +- modelzoo/CAN/data/prepare_data.sh | 1 + modelzoo/CAN/data/script/data_iterator.py | 132 +++++++++- modelzoo/CAN/data/script/generate_voc.py.bk | 65 ----- modelzoo/CAN/data/script/local_aggretor.py | 17 +- modelzoo/CAN/result/README.md | 2 + modelzoo/CAN/train.py | 264 ++++++-------------- 7 files changed, 226 insertions(+), 270 deletions(-) delete mode 100644 modelzoo/CAN/data/script/generate_voc.py.bk create mode 100644 modelzoo/CAN/result/README.md diff --git a/modelzoo/CAN/README.md b/modelzoo/CAN/README.md index c26f3f8eace..c4350c622ea 100644 --- a/modelzoo/CAN/README.md +++ b/modelzoo/CAN/README.md @@ -10,10 +10,11 @@ The following is a brief directory structure and description for this example: │ └── README.md # Documentation describing how to prepare dataset │ └── script # Directory contains scripts to process dataset │ ├── data_iterator.py -│ ├── generate_voc.py -│ ├── local_aggretor.py -│ ├── shuffle.py -│ └── split_by_user.py +│ ├── generate_voc.py # Create a list of features +│ ├── local_aggretor.py # Generate sample data +│ ├── shuffle.py +│ ├──process_data.py # Parse raw json data +│ └── split_by_user.py # Divide the dataset ├── script # Directory contains scripts to CAN model │ ├── Dice.py │ ├── model.py @@ -65,9 +66,7 @@ paper: [arxiv (to be released)]() ​ 2.train. ``` -CUDA_VISIBLE_DEVICES=0 python script/train.py train {model} - -model: CAN,Cartesion,PNN, etc. (check the train.py) +python train.py ``` ​ @@ -76,7 +75,7 @@ model: CAN,Cartesion,PNN, etc. (check the train.py) ## Dataset -Amazon, Taobao and Avazu dataset is used as benchmark dataset. +Amazon Dataset Books dataset is used as benchmark dataset. ### Prepare diff --git a/modelzoo/CAN/data/prepare_data.sh b/modelzoo/CAN/data/prepare_data.sh index 54c9733dd15..4e341477042 100644 --- a/modelzoo/CAN/data/prepare_data.sh +++ b/modelzoo/CAN/data/prepare_data.sh @@ -7,3 +7,4 @@ python script/process_data.py meta_Books.json reviews_Books.json python script/local_aggretor.py python script/split_by_user.py python script/generate_voc.py + diff --git a/modelzoo/CAN/data/script/data_iterator.py b/modelzoo/CAN/data/script/data_iterator.py index 75c53c46919..4f71db10d51 100644 --- a/modelzoo/CAN/data/script/data_iterator.py +++ b/modelzoo/CAN/data/script/data_iterator.py @@ -1,11 +1,14 @@ import numpy +import pandas as pd import json import _pickle as cPickle import random - +import os import gzip +import time + -import data.script.shuffle +path = 'data/' def unicode_to_utf8(d): return dict((key.encode("UTF-8"), value) for (key,value) in d.items()) @@ -51,15 +54,19 @@ def __init__(self, source, else: self.source = fopen(source, 'r') self.source_dicts = [] - for source_dict in [uid_voc, mid_voc, cat_voc, './data/item_carte_voc.pkl', './data/cate_carte_voc.pkl']: + for source_dict in [uid_voc, mid_voc, cat_voc, path+'item_carte_voc.pkl',path+ 'cate_carte_voc.pkl']: + self.source_dicts.append(load_dict(source_dict)) - f_meta = open("./data/item-info", "r") + + f_meta = open(path+"item-info", "r") meta_map = {} for line in f_meta: arr = line.strip().split("\t") if arr[0] not in meta_map: meta_map[arr[0]] = arr[1] + + self.meta_id_map ={} for key in meta_map: val = meta_map[key] @@ -73,7 +80,8 @@ def __init__(self, source, cat_idx = 0 self.meta_id_map[mid_idx] = cat_idx - f_review = open("./data/reviews-info", "r") + + f_review = open(path+"reviews-info", "r") self.mid_list_for_random = [] for line in f_review: arr = line.strip().split("\t") @@ -82,6 +90,7 @@ def __init__(self, source, tmp_idx = self.source_dicts[1][arr[1]] self.mid_list_for_random.append(tmp_idx) + self.batch_size = batch_size self.maxlen = maxlen self.minlen = minlen @@ -129,6 +138,8 @@ def __next__(self): break self.source_buffer.append(ss.strip("\n").split("\t")) + + # sort by history behavior length if self.sort_by_length: his_length = numpy.array([len(s[4].split("")) for s in self.source_buffer]) @@ -139,6 +150,7 @@ def __next__(self): else: self.source_buffer.reverse() + if len(self.source_buffer) == 0: self.end_of_data = False self.reset() @@ -168,6 +180,7 @@ def __next__(self): item_carte.append(i_c) mid_list = tmp + tmp1 = [] cate_carte = [] for fea in ss[5].split(""): @@ -177,6 +190,7 @@ def __next__(self): cate_carte.append(c_c) cat_list = tmp1 + # read from source file and map to word index if self.minlen != None: @@ -187,6 +201,9 @@ def __next__(self): noclk_mid_list = [] noclk_cat_list = [] + + #print('end:',self.meta_id_map) + start = time.time() for pos_mid in mid_list: noclk_tmp_mid = [] noclk_tmp_cat = [] @@ -194,8 +211,10 @@ def __next__(self): while True: noclk_mid_indx = random.randint(0, len(self.mid_list_for_random)-1) noclk_mid = self.mid_list_for_random[noclk_mid_indx] - if noclk_mid == pos_mid: - continue + + + # if noclk_mid == pos_mid: + # continue noclk_tmp_mid.append(noclk_mid) noclk_tmp_cat.append(self.meta_id_map[noclk_mid]) noclk_index += 1 @@ -203,8 +222,16 @@ def __next__(self): break noclk_mid_list.append(noclk_tmp_mid) noclk_cat_list.append(noclk_tmp_cat) + + + + carte_list = [item_carte, cate_carte] source.append([uid, mid, cat, mid_list, cat_list, noclk_mid_list, noclk_cat_list, carte_list]) + + + + if self.label_type == 1: target.append([float(ss[0])]) else: @@ -212,6 +239,7 @@ def __next__(self): if len(source) >= self.batch_size or len(target) >= self.batch_size: break + except IOError: self.end_of_data = True @@ -219,6 +247,96 @@ def __next__(self): if len(source) == 0 or len(target) == 0: source, target = self.next() + + return source, target + + + + +def prepare_data(input, target, maxlen = None, return_neg = False): + + # x: a list of sentences + + + lengths_x = [len(s[4]) for s in input] + seqs_mid = [inp[3] for inp in input] + seqs_cat = [inp[4] for inp in input] + noclk_seqs_mid = [inp[5] for inp in input] + noclk_seqs_cat = [inp[6] for inp in input] + seqs_item_carte = [inp[7][0] for inp in input] + seqs_cate_carte = [inp[7][1] for inp in input] + + + + if maxlen is not None: + new_seqs_mid = [] + new_seqs_cat = [] + new_noclk_seqs_mid = [] + new_noclk_seqs_cat = [] + new_lengths_x = [] + new_seqs_item_carte = [] + new_seqs_cate_carte = [] + for l_x, inp in zip(lengths_x, input): + if l_x > maxlen: + new_seqs_mid.append(inp[3][l_x - maxlen:]) + new_seqs_cat.append(inp[4][l_x - maxlen:]) + new_noclk_seqs_mid.append(inp[5][l_x - maxlen:]) + new_noclk_seqs_cat.append(inp[6][l_x - maxlen:]) + new_seqs_item_carte.append(inp[7][0][l_x - maxlen:]) + new_seqs_cate_carte.append(inp[7][1][l_x - maxlen:]) + new_lengths_x.append(maxlen) + else: + new_seqs_mid.append(inp[3]) + new_seqs_cat.append(inp[4]) + new_noclk_seqs_mid.append(inp[5]) + new_noclk_seqs_cat.append(inp[6]) + new_seqs_item_carte.append(inp[7][0]) + new_seqs_cate_carte.append(inp[7][1]) + new_lengths_x.append(l_x) + lengths_x = new_lengths_x + seqs_mid = new_seqs_mid + seqs_cat = new_seqs_cat + noclk_seqs_mid = new_noclk_seqs_mid + noclk_seqs_cat = new_noclk_seqs_cat + seqs_item_carte = new_seqs_item_carte + seqs_cate_carte = new_seqs_cate_carte + + if len(lengths_x) < 1: + return None, None, None, None + + n_samples = len(seqs_mid) + maxlen_x = numpy.max(lengths_x) + neg_samples = len(noclk_seqs_mid[0][0]) + + mid_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') + cat_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') + noclk_mid_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') + noclk_cat_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') + item_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64') + cate_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64') + mid_mask = numpy.zeros((n_samples, maxlen_x)).astype('float32') + for idx, [s_x, s_y, no_sx, no_sy, i_c, c_c] in enumerate(zip(seqs_mid, seqs_cat, noclk_seqs_mid, noclk_seqs_cat, seqs_item_carte, seqs_cate_carte)): + mid_mask[idx, :lengths_x[idx]] = 1. + mid_his[idx, :lengths_x[idx]] = s_x + cat_his[idx, :lengths_x[idx]] = s_y + noclk_mid_his[idx, :lengths_x[idx], :] = no_sx + noclk_cat_his[idx, :lengths_x[idx], :] = no_sy + item_carte[idx, :lengths_x[idx]] = i_c + cate_carte[idx, :lengths_x[idx]] = c_c + + uids = numpy.array([inp[0] for inp in input]) + mids = numpy.array([inp[1] for inp in input]) + cats = numpy.array([inp[2] for inp in input]) + + carte = numpy.stack([item_carte, cate_carte], axis=1) + + if return_neg: + return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), noclk_mid_his, noclk_cat_his, carte + + else: + return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), carte + + diff --git a/modelzoo/CAN/data/script/generate_voc.py.bk b/modelzoo/CAN/data/script/generate_voc.py.bk deleted file mode 100644 index 411708148aa..00000000000 --- a/modelzoo/CAN/data/script/generate_voc.py.bk +++ /dev/null @@ -1,65 +0,0 @@ -import cPickle - -f_train = open("local_train_splitByUser", "r") -uid_dict = {} -mid_dict = {} -cat_dict = {} - -iddd = 0 -for line in f_train: - arr = line.strip("\n").split("\t") - clk = arr[0] - uid = arr[1] - mid = arr[2] - cat = arr[3] - mid_list = arr[4] - cat_list = arr[5] - if uid not in uid_dict: - uid_dict[uid] = 0 - uid_dict[uid] += 1 - if mid not in mid_dict: - mid_dict[mid] = 0 - mid_dict[mid] += 1 - if cat not in cat_dict: - cat_dict[cat] = 0 - cat_dict[cat] += 1 - if len(mid_list) == 0: - continue - for m in mid_list.split(""): - if m not in mid_dict: - mid_dict[m] = 0 - mid_dict[m] += 1 - #print iddd - iddd+=1 - for c in cat_list.split(""): - if c not in cat_dict: - cat_dict[c] = 0 - cat_dict[c] += 1 - -sorted_uid_dict = sorted(uid_dict.iteritems(), key=lambda x:x[1], reverse=True) -sorted_mid_dict = sorted(mid_dict.iteritems(), key=lambda x:x[1], reverse=True) -sorted_cat_dict = sorted(cat_dict.iteritems(), key=lambda x:x[1], reverse=True) - -uid_voc = {} -index = 0 -for key, value in sorted_uid_dict: - uid_voc[key] = index - index += 1 - -mid_voc = {} -mid_voc["default_mid"] = 0 -index = 1 -for key, value in sorted_mid_dict: - mid_voc[key] = index - index += 1 - -cat_voc = {} -cat_voc["default_cat"] = 0 -index = 1 -for key, value in sorted_cat_dict: - cat_voc[key] = index - index += 1 - -cPickle.dump(uid_voc, open("uid_voc.pkl", "w")) -cPickle.dump(mid_voc, open("mid_voc.pkl", "w")) -cPickle.dump(cat_voc, open("cat_voc.pkl", "w")) diff --git a/modelzoo/CAN/data/script/local_aggretor.py b/modelzoo/CAN/data/script/local_aggretor.py index e652ff3d543..05e43d4937a 100644 --- a/modelzoo/CAN/data/script/local_aggretor.py +++ b/modelzoo/CAN/data/script/local_aggretor.py @@ -2,9 +2,10 @@ import hashlib import random -fin = open("../../DIEN/data/jointed-new-split-info", "r") -ftrain = open("../../DIEN/data/local_train", "w") -ftest = open("../../DIEN/data/local_test", "w") + +fin = open("jointed-new-split-info", "r") +ftrain = open("local_train", "w") +ftest = open("local_test", "w") last_user = "0" common_fea = "" @@ -18,13 +19,14 @@ dt = items[5] cat1 = items[6] - if ds=="20180118": + if ds == "20180118": fo = ftrain else: fo = ftest if user != last_user: movie_id_list = [] cate1_list = [] + #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + "" else: history_clk_num = len(movie_id_list) cat_str = "" @@ -35,11 +37,12 @@ mid_str += mid + "" if len(cat_str) > 0: cat_str = cat_str[:-1] if len(mid_str) > 0: mid_str = mid_str[:-1] - if history_clk_num >= 1: # 8 is the average length of user behavior - print(items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + mid_str + "\t" + cat_str, + if history_clk_num >= 1: # 8 is the average length of user behavior + print(items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 + + "\t" + mid_str + "\t" + cat_str, file=fo) last_user = user if clk: movie_id_list.append(movie_id) - cate1_list.append(cat1) + cate1_list.append(cat1) line_idx += 1 diff --git a/modelzoo/CAN/result/README.md b/modelzoo/CAN/result/README.md new file mode 100644 index 00000000000..ccec44eb9a5 --- /dev/null +++ b/modelzoo/CAN/result/README.md @@ -0,0 +1,2 @@ +# Result +Checkpoint & timeline file are default saved in this folder. diff --git a/modelzoo/CAN/train.py b/modelzoo/CAN/train.py index dd54677aecc..d13ead12e8c 100644 --- a/modelzoo/CAN/train.py +++ b/modelzoo/CAN/train.py @@ -1,95 +1,26 @@ import numpy -from data.script.data_iterator import DataIterator +import pandas as pd +from data.script.data_iterator import DataIterator,prepare_data import tensorflow as tf from script.model import * import time import random import sys from script.utils import * -from tqdm import tqdm +from tqdm import * +import pickle as pkl +import argparse + EMBEDDING_DIM = 18 HIDDEN_SIZE = 18 * 2 ATTENTION_SIZE = 18 * 2 best_auc = 0.0 -def prepare_data(input, target, maxlen = None, return_neg = False): - # x: a list of sentences - lengths_x = [len(s[4]) for s in input] - seqs_mid = [inp[3] for inp in input] - seqs_cat = [inp[4] for inp in input] - noclk_seqs_mid = [inp[5] for inp in input] - noclk_seqs_cat = [inp[6] for inp in input] - seqs_item_carte = [inp[7][0] for inp in input] - seqs_cate_carte = [inp[7][1] for inp in input] - - if maxlen is not None: - new_seqs_mid = [] - new_seqs_cat = [] - new_noclk_seqs_mid = [] - new_noclk_seqs_cat = [] - new_lengths_x = [] - new_seqs_item_carte = [] - new_seqs_cate_carte = [] - for l_x, inp in zip(lengths_x, input): - if l_x > maxlen: - new_seqs_mid.append(inp[3][l_x - maxlen:]) - new_seqs_cat.append(inp[4][l_x - maxlen:]) - new_noclk_seqs_mid.append(inp[5][l_x - maxlen:]) - new_noclk_seqs_cat.append(inp[6][l_x - maxlen:]) - new_seqs_item_carte.append(inp[7][0][l_x - maxlen:]) - new_seqs_cate_carte.append(inp[7][1][l_x - maxlen:]) - new_lengths_x.append(maxlen) - else: - new_seqs_mid.append(inp[3]) - new_seqs_cat.append(inp[4]) - new_noclk_seqs_mid.append(inp[5]) - new_noclk_seqs_cat.append(inp[6]) - new_seqs_item_carte.append(inp[7][0]) - new_seqs_cate_carte.append(inp[7][1]) - new_lengths_x.append(l_x) - lengths_x = new_lengths_x - seqs_mid = new_seqs_mid - seqs_cat = new_seqs_cat - noclk_seqs_mid = new_noclk_seqs_mid - noclk_seqs_cat = new_noclk_seqs_cat - seqs_item_carte = new_seqs_item_carte - seqs_cate_carte = new_seqs_cate_carte - - if len(lengths_x) < 1: - return None, None, None, None - - n_samples = len(seqs_mid) - maxlen_x = numpy.max(lengths_x) - neg_samples = len(noclk_seqs_mid[0][0]) - - mid_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') - cat_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') - noclk_mid_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') - noclk_cat_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') - item_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64') - cate_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64') - mid_mask = numpy.zeros((n_samples, maxlen_x)).astype('float32') - for idx, [s_x, s_y, no_sx, no_sy, i_c, c_c] in enumerate(zip(seqs_mid, seqs_cat, noclk_seqs_mid, noclk_seqs_cat, seqs_item_carte, seqs_cate_carte)): - mid_mask[idx, :lengths_x[idx]] = 1. - mid_his[idx, :lengths_x[idx]] = s_x - cat_his[idx, :lengths_x[idx]] = s_y - noclk_mid_his[idx, :lengths_x[idx], :] = no_sx - noclk_cat_his[idx, :lengths_x[idx], :] = no_sy - item_carte[idx, :lengths_x[idx]] = i_c - cate_carte[idx, :lengths_x[idx]] = c_c - - uids = numpy.array([inp[0] for inp in input]) - mids = numpy.array([inp[1] for inp in input]) - cats = numpy.array([inp[2] for inp in input]) - - carte = numpy.stack([item_carte, cate_carte], axis=1) - - if return_neg: - return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), noclk_mid_his, noclk_cat_his, carte +file_location = 'data' + + - else: - return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), carte def eval(sess, test_data, model, model_path): @@ -119,80 +50,44 @@ def eval(sess, test_data, model, model_path): #model.save(sess, model_path) return test_auc, loss_sum, accuracy_sum, aux_loss_sum -def train( - train_file = "../DIEN/data/local_train_splitByUser", - test_file = "../DIEN/data/local_test_splitByUser", - uid_voc = "../CAN/data/uid_voc.pkl", - mid_voc = "../CAN/data/mid_voc.pkl", - cat_voc = "../CAN/data/cat_voc.pkl", - batch_size = 128, - maxlen = 100, - test_iter = 8400, - save_iter = 8400, - model_type = 'DNN', + + +def train(train_file = file_location+"/local_train_splitByUser", + test_file =file_location+ "/local_test_splitByUser", + uid_voc =file_location+ "/uid_voc.pkl", + mid_voc = file_location+"/mid_voc.pkl", + cat_voc = file_location+"/cat_voc.pkl", + model_type = 'CAN', seed = 2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: - label_type = 1 - train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False, label_type=label_type) - test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, label_type=label_type) + + train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, args.batch_size, args.maxlen, + shuffle_each_epoch=False, label_type=args.label_type) + + test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, args.batch_size, args.maxlen, + label_type=args.label_type) + n_uid, n_mid, n_cat, n_carte = train_data.get_n() - if model_type == 'DNN': - model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,use_softmax=False) - elif model_type == 'Cartesion': - model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,use_softmax=False, use_cartes=True) - elif model_type == 'CAN+Cartesion': - model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True, use_cartes=True) - elif model_type == 'CAN': - model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True) - elif model_type == 'PNN': - model = Model_PNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) - elif model_type == 'ONN': - model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) - elif model_type == 'Wide': - model = Model_WideDeep(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'NCF': - model = Model_NCF(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'FM': - model = Model_FM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) - elif model_type == 'FFM': - model = Model_FFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) - elif model_type == 'DeepFM': - model = Model_DeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) - elif model_type == 'DeepFFM': - model = Model_DeepFFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) - elif model_type == 'xDeepFM': - model = Model_xDeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) - elif model_type == 'ONN': - model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'DIN': - model = Model_DIN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'DIEN': - model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'CAN+DIEN': - model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True) - else: - print ("Invalid model_type : %s"% model_type) - return - print("Model: ", model_type) + + model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() count() - iter = 0 lr = 0.001 + loss_sum = 0.0 + accuracy_sum = 0. + aux_loss_sum = 0. + for iter in range(10): + for src, tgt in tqdm(train_data): + uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data(src, tgt, args.maxlen, return_neg=True) - for itr in range(1) : - loss_sum = 0.0 - accuracy_sum = 0. - aux_loss_sum = 0. - for src, tgt in train_data: - uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data(src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats, carte]) loss_sum += loss accuracy_sum += acc @@ -204,13 +99,13 @@ def train( loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 - if (iter % test_iter) == 0: + if (iter % args.test_iter) == 0: auc_, loss_, acc_, aux_ = eval(sess, test_data, model, best_model_path) print('iter: %d --- test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % (iter, auc_, loss_, acc_, aux_)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 - if (iter % save_iter) == 0: + if (iter % args.save_iter) == 0: print('save model iter: %d' %(iter)) model.save(sess, model_path+"--"+str(iter)) @@ -232,61 +127,64 @@ def count(): print("Prameter: ", total_parameters) def test( - train_file = "../DIEN/data/local_train_splitByUser", - test_file = "../DIEN/data/local_test_splitByUser", - uid_voc = "../CAN/data/uid_voc.pkl", - mid_voc = "../CAN/data/mid_voc.pkl", - cat_voc = "../CAN/data/cat_voc.pkl", - batch_size = 128, - maxlen = 100, - model_type = 'DNN', + train_file = file_location+"local_train_splitByUser", + test_file = file_location+"local_test_splitByUser", + uid_voc = file_location+"uid_voc.pkl", + mid_voc = file_location+"mid_voc.pkl", + cat_voc = file_location+"cat_voc.pkl", + model_type='CAN' , seed = 2 ): model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: - train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) - test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) + train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, args.batch_size, args.maxlen) + test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, args.batch_size, args.maxlen) n_uid, n_mid, n_cat = train_data.get_n() - if model_type == 'DNN': - model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'PNN': - model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'Wide': - model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'DIN': - model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'DIN-V2-gru-att-gru': - model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'DIN-V2-gru-gru-att': - model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'DIN-V2-gru-qa-attGru': - model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'DIN-V2-gru-vec-attGru': - model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - elif model_type == 'DIEN': - model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) - else: - print ("Invalid model_type : %s", model_type) - return + + model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) + model.restore(sess, model_path) print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path)) + +def get_arg_parser(): + parser = argparse.ArgumentParser() + parser.add_argument('--batch_size', + help='Batch size to train. Default is 512', + type=int, + default=128) + parser.add_argument('--training', + help='train or test ', + type=bool, + default=True) + + parser.add_argument('--maxlen', + type=int, + default=100) + + parser.add_argument('--test_iter', + type=int, + default=8400) + + parser.add_argument('--save_iter', + type=int, + default=8400) + parser.add_argument('--label_type', + type=int, + default=1) + + return parser + + + if __name__ == '__main__': - if len(sys.argv) == 4: - SEED = int(sys.argv[3]) - else: - SEED = 3 - tf.set_random_seed(SEED) - numpy.random.seed(SEED) - random.seed(SEED) - - if sys.argv[1] == 'train': - train(model_type=sys.argv[2], seed=SEED) - elif sys.argv[1] == 'test': - test(model_type=sys.argv[2], seed=SEED) + parser = get_arg_parser() + args = parser.parse_args() + if args.training: + train() else: - print('do nothing...') + test() From b6134722d705e3e8440f5c4592e4ef78a1c2626e Mon Sep 17 00:00:00 2001 From: lihangtian <936971274@qq.com> Date: Wed, 12 Oct 2022 15:52:17 +0800 Subject: [PATCH 6/8] [ModelZoo] Support FNN --- modelzoo/FNN/README.md | 28 +- modelzoo/FNN/data/prepare_data.sh | 15 ++ modelzoo/FNN/data/script/data2labelencode.py | 54 ++++ modelzoo/FNN/data/script/generate_neg.py | 63 +++++ modelzoo/FNN/data/script/generate_voc.py | 66 +++++ .../FNN/data/script/history_behavior_list.py | 41 +++ modelzoo/FNN/data/script/item_map.py | 29 +++ modelzoo/FNN/data/script/local_aggretor.py | 47 ++++ modelzoo/FNN/data/script/pick2txt.py | 14 + modelzoo/FNN/data/script/process_data.py | 108 ++++++++ modelzoo/FNN/data/script/split_by_user.py | 18 ++ modelzoo/FNN/result/README.md | 2 +- modelzoo/FNN/script/feature_column.py | 35 ++- modelzoo/FNN/script/layers/utils.py | 2 + modelzoo/FNN/script/utils.py | 2 +- modelzoo/FNN/train.py | 242 +++++++++++------- 16 files changed, 660 insertions(+), 106 deletions(-) create mode 100644 modelzoo/FNN/data/prepare_data.sh create mode 100644 modelzoo/FNN/data/script/data2labelencode.py create mode 100644 modelzoo/FNN/data/script/generate_neg.py create mode 100644 modelzoo/FNN/data/script/generate_voc.py create mode 100644 modelzoo/FNN/data/script/history_behavior_list.py create mode 100644 modelzoo/FNN/data/script/item_map.py create mode 100644 modelzoo/FNN/data/script/local_aggretor.py create mode 100644 modelzoo/FNN/data/script/pick2txt.py create mode 100644 modelzoo/FNN/data/script/process_data.py create mode 100644 modelzoo/FNN/data/script/split_by_user.py diff --git a/modelzoo/FNN/README.md b/modelzoo/FNN/README.md index a2f9e721921..39fd79202ec 100644 --- a/modelzoo/FNN/README.md +++ b/modelzoo/FNN/README.md @@ -6,7 +6,18 @@ The following is a brief directory structure and description for this example: ``` ├── data # Data set directory +│ ├── prepare_data.sh # Shell script to download and process dataset │ └── README.md # Documentation describing how to prepare dataset +│ └──script # Directory contains scripts to process dataset +│ ├──data2labelencode # Convert data to csv file +│ ├── generate_neg.py # Create negative sample +│ ├── generate_voc.py # Create a list of features +│ ├── history_behavior_list.py # Count user's history behaviors +│ ├── item_map.py # Create a map between item id and cate +│ ├── local_aggretor.py # Generate sample data +│ ├── pick2txt.py # Convert voc's format +│ ├── process_data.py # Parse raw json data +│ └── split_by_user.py # Divide the dataset ├── script # model set directory │ ├── contrib #Directory contains rnn │ ├── estimator #Directory contains estimator to data @@ -29,7 +40,7 @@ The following is a brief directory structure and description for this example: ## Model Structure -Implementation of paper "Deep Learning over Multi-field Categorical Data– A Case Study on User Response Prediction". +Implementation of paper "Deep Learning over Multi-field Categorical Data A Case Study on User Response Prediction". @@ -67,21 +78,8 @@ Implementation of paper "Deep Learning over Multi-field Categorical Data– A Ca ## Dataset - iPinYou dataset is used as benchmark dataset. + Amazon Dataset Books dataset is used as benchmark dataset. ### Prepare For details of Data download, see [Data Preparation](https://github.com/Atomu2014/make-ipinyou-data) - -### Campaigs - -We use campaign 1458 as example here. - -``` -make-ipinyou-data/1458$ ls -featindex.txt test.log.txt test.txt train.log.txt train.txt -``` - -- `train.log.txt` and `test.log.txt` are the formalised string data for each row (record) in train and test. The first column is whether the user click the ad or not. -- `featindex.txt`maps the features to their indexes. For example, `8:1.1.174.* 76` means that the 8th column in `train.log.txt` with the string `1.1.174.*` maps to feature index `76`. -- `train.txt` and `test.txt` are the mapped vector data for `train.log.txt` and `test.log.txt`. The format is y:click, and x:features. Such data is in the standard form as introduced in [iPinYou Benchmarking](http://arxiv.org/abs/1407.7073). diff --git a/modelzoo/FNN/data/prepare_data.sh b/modelzoo/FNN/data/prepare_data.sh new file mode 100644 index 00000000000..49fdb9a0da1 --- /dev/null +++ b/modelzoo/FNN/data/prepare_data.sh @@ -0,0 +1,15 @@ +wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Books.json.gz +wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz +gunzip reviews_Books.json.gz +gunzip meta_Books.json.gz + +python script/process_data.py meta_Books.json reviews_Books.json +python script/local_aggretor.py +python script/split_by_user.py +python script/generate_voc.py + +python script/item_map.py +python script/history_behavior_list.py +python script/generate_neg.py + +python script/data2labelencode.py \ No newline at end of file diff --git a/modelzoo/FNN/data/script/data2labelencode.py b/modelzoo/FNN/data/script/data2labelencode.py new file mode 100644 index 00000000000..04daba5e28a --- /dev/null +++ b/modelzoo/FNN/data/script/data2labelencode.py @@ -0,0 +1,54 @@ +import pandas as pd +import numpy as np +import pickle + +UNSEQ_COLUMNS = ['UID', 'ITEM', 'CATEGORY'] +HIS_COLUMNS = ['HISTORY_ITEM', 'HISTORY_CATEGORY'] +SEQ_COLUMNS = HIS_COLUMNS +LABEL_COLUMN = ['CLICKED'] +TRAIN_DATA_COLUMNS = LABEL_COLUMN + UNSEQ_COLUMNS + SEQ_COLUMNS + + + +def inputs_to_labelencode(filename): + def encoder_dict(data, category_col): + category_dict = data[category_col].value_counts() + category_dict = pd.Series(np.arange(0, len(category_dict)), index=category_dict.index).to_dict() + data[category_col + '_encode'] = data[category_col].map(category_dict).astype('int32') + return data + + uid_file = '../CAN/data/uid_voc.txt' + mid_file = '../CAN/data/mid_voc.txt' + cat_file = '../CAN/data/cat_voc.txt' + + uid_data = pd.read_csv(uid_file, encoding="utf-8", header=None, names=['UID']) + mid_data = pd.read_csv(mid_file, encoding="utf-8", header=None, names=['ITEM']) + cat_data = pd.read_csv(cat_file, encoding="utf-8", header=None, names=['CATEGORY']) + + uid_data = encoder_dict(uid_data, 'UID') + mid_data = encoder_dict(mid_data, 'ITEM') + cat_data = encoder_dict(cat_data, 'CATEGORY') + + dataset = pd.read_csv(filename, encoding="utf-8", + header=None, names=TRAIN_DATA_COLUMNS, sep="\t", low_memory=False) + for key in ['UID','ITEM','CATEGORY']: + if key=='UID': + dataset = pd.merge(dataset, uid_data, on=key, how='inner') + elif key=='ITEM': + dataset = pd.merge(dataset, mid_data, on=key, how='inner') + else: + dataset = pd.merge(dataset, cat_data, on=key, how='inner') + + dataset = dataset.drop(UNSEQ_COLUMNS + SEQ_COLUMNS, axis=1) + + dataset.to_csv(filename + '_to_labelencode.txt',index=0,header=0) + uid_data.to_csv('dataset/uid_labelencode.csv',index=False) + mid_data.to_csv('dataset/mid_labelencode.csv',index=False) + cat_data.to_csv('dataset/cat_labelencode.csv',index=False) + + + +if __name__ == '__main__': + inputs_to_labelencode('../CAN/data/local_train_splitByUser') + inputs_to_labelencode('../CAN/data/local_test_splitByUser') + diff --git a/modelzoo/FNN/data/script/generate_neg.py b/modelzoo/FNN/data/script/generate_neg.py new file mode 100644 index 00000000000..a10ef919e13 --- /dev/null +++ b/modelzoo/FNN/data/script/generate_neg.py @@ -0,0 +1,63 @@ +import random + +NEG_SEQ_LENGTH_FOR_EACH_HISTORY_ITEM = 1 + + +def createNegData(file): + with open(file, 'r') as f_raw: + with open(file + '_neg', 'w') as f_out: + FirstLine = True + for line in f_raw: + linelist = line.strip().split('\t') + uid = linelist[1] + + if uid not in user_history_behavior: + str = '\t' + else: + his_items = linelist[4].split('') + neg_items_str = '' + neg_cates_str = '' + for pos in his_items: + tmp_items_str = '' + tmp_cates_str = '' + tmp_items = [] + tmp_cates = [] + neg_length = 0 + while (True): + index = random.randint( + 0, + len(user_history_behavior[uid][0]) - 1) + if user_history_behavior[uid][0][index] != pos: + tmp_items.append( + user_history_behavior[uid][0][index]) + tmp_cates.append( + user_history_behavior[uid][1][index]) + neg_length += 1 + if neg_length >= NEG_SEQ_LENGTH_FOR_EACH_HISTORY_ITEM: + break + for item in tmp_items: + tmp_items_str += (item + '') + for cate in tmp_cates: + tmp_cates_str += (cate + '') + neg_items_str += (tmp_items_str[:-1] + '') + neg_cates_str += (tmp_cates_str[:-1] + '') + str = neg_items_str[:-1] + '\t' + neg_cates_str[:-1] + if FirstLine: + f_out.write(str) + FirstLine = False + else: + f_out.write('\n' + str) + + +user_history_behavior = {} +with open('user_history_behavior.txt', 'r') as f: + for line in f: + linelist = line.strip().split('\t') + uid = linelist[0] + items = linelist[1].split('') + cates = linelist[2].split('') + user_history_behavior[uid] = [items, cates] + +data_file = ['local_test_splitByUser', 'local_train_splitByUser'] +for file in data_file: + createNegData(file) diff --git a/modelzoo/FNN/data/script/generate_voc.py b/modelzoo/FNN/data/script/generate_voc.py new file mode 100644 index 00000000000..447fe6393b7 --- /dev/null +++ b/modelzoo/FNN/data/script/generate_voc.py @@ -0,0 +1,66 @@ +# import cPickle +import pickle as cPickle + +f_train = open("local_train_splitByUser", "r") +uid_dict = {} +mid_dict = {} +cat_dict = {} + +iddd = 0 +for line in f_train: + arr = line.strip("\n").split("\t") + clk = arr[0] + uid = arr[1] + mid = arr[2] + cat = arr[3] + mid_list = arr[4] + cat_list = arr[5] + if uid not in uid_dict: + uid_dict[uid] = 0 + uid_dict[uid] += 1 + if mid not in mid_dict: + mid_dict[mid] = 0 + mid_dict[mid] += 1 + if cat not in cat_dict: + cat_dict[cat] = 0 + cat_dict[cat] += 1 + if len(mid_list) == 0: + continue + for m in mid_list.split(""): + if m not in mid_dict: + mid_dict[m] = 0 + mid_dict[m] += 1 + #print iddd + iddd+=1 + for c in cat_list.split(""): + if c not in cat_dict: + cat_dict[c] = 0 + cat_dict[c] += 1 + +sorted_uid_dict = sorted(uid_dict.items(), key=lambda x:x[1], reverse=True) +sorted_mid_dict = sorted(mid_dict.items(), key=lambda x:x[1], reverse=True) +sorted_cat_dict = sorted(cat_dict.items(), key=lambda x:x[1], reverse=True) + +uid_voc = {} +index = 0 +for key, value in sorted_uid_dict: + uid_voc[key] = index + index += 1 + +mid_voc = {} +mid_voc["default_mid"] = 0 +index = 1 +for key, value in sorted_mid_dict: + mid_voc[key] = index + index += 1 + +cat_voc = {} +cat_voc["default_cat"] = 0 +index = 1 +for key, value in sorted_cat_dict: + cat_voc[key] = index + index += 1 + +cPickle.dump(uid_voc, open("uid_voc.pkl", "wb")) +cPickle.dump(mid_voc, open("mid_voc.pkl", "wb")) +cPickle.dump(cat_voc, open("cat_voc.pkl", "wb")) diff --git a/modelzoo/FNN/data/script/history_behavior_list.py b/modelzoo/FNN/data/script/history_behavior_list.py new file mode 100644 index 00000000000..6adaf398cef --- /dev/null +++ b/modelzoo/FNN/data/script/history_behavior_list.py @@ -0,0 +1,41 @@ +item_to_cate_map = {} +with open('item2catmap.txt', 'r') as f: + for line in f: + linelist = line.strip().split('\t') + item = linelist[0] + cate = linelist[1] + item_to_cate_map[item] = cate + +user_history_behavior = {} +with open('reviews-info', 'r') as f: + for line in f: + linelist = line.strip().split('\t') + uid = linelist[0] + item = linelist[1] + if uid not in user_history_behavior: + user_history_behavior[uid] = [item] + else: + if item not in user_history_behavior[uid]: + user_history_behavior[uid].append(item) + +FirstLine = True +with open('user_history_behavior.txt', 'w') as f: + for uid, items in user_history_behavior.items(): + itemstr = '' + catestr = '' + for i in items: + if i in item_to_cate_map: + c = item_to_cate_map[i] + else: + c = 'Unknown' + if not itemstr: + itemstr += i + catestr += c + else: + itemstr += ('' + i) + catestr += ('' + c) + if FirstLine: + f.write(uid + '\t' + itemstr + '\t' + catestr) + FirstLine = False + else: + f.write('\n' + uid + '\t' + itemstr + '\t' + catestr) diff --git a/modelzoo/FNN/data/script/item_map.py b/modelzoo/FNN/data/script/item_map.py new file mode 100644 index 00000000000..94bebee5184 --- /dev/null +++ b/modelzoo/FNN/data/script/item_map.py @@ -0,0 +1,29 @@ +import sys +from tqdm import tqdm + +data_file = ['local_test_splitByUser', 'local_train_splitByUser'] + +item_to_cate_map = {} +# 367983 +for file_name in data_file: + with open(file_name, 'r') as f: + for line in f: + linelist = line.strip().split('\t') + items = linelist[4].split('') + cates = linelist[5].split('') + items.append(linelist[2]) + cates.append(linelist[3]) + # print(items) + # print(cates) + for index, item in enumerate(items): + if item not in item_to_cate_map: + item_to_cate_map[item] = cates[index] + +with open('item2catmap.txt', 'w') as f: + firstline = True + for item, cate in item_to_cate_map.items(): + if firstline: + f.write(item + '\t' + cate) + firstline = False + else: + f.write('\n' + item + '\t' + cate) diff --git a/modelzoo/FNN/data/script/local_aggretor.py b/modelzoo/FNN/data/script/local_aggretor.py new file mode 100644 index 00000000000..1fd8aceb32c --- /dev/null +++ b/modelzoo/FNN/data/script/local_aggretor.py @@ -0,0 +1,47 @@ +import sys +import hashlib +import random + +fin = open("jointed-new-split-info", "r") +ftrain = open("local_train", "w") +ftest = open("local_test", "w") + +last_user = "0" +common_fea = "" +line_idx = 0 +for line in fin: + items = line.strip().split("\t") + ds = items[0] + clk = int(items[1]) + user = items[2] + movie_id = items[3] + dt = items[5] + cat1 = items[6] + + if ds == "20180118": + fo = ftrain + else: + fo = ftest + if user != last_user: + movie_id_list = [] + cate1_list = [] + #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + "" + else: + history_clk_num = len(movie_id_list) + cat_str = "" + mid_str = "" + for c1 in cate1_list: + cat_str += c1 + "" + for mid in movie_id_list: + mid_str += mid + "" + if len(cat_str) > 0: cat_str = cat_str[:-1] + if len(mid_str) > 0: mid_str = mid_str[:-1] + if history_clk_num >= 1: # 8 is the average length of user behavior + print(items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 + + "\t" + mid_str + "\t" + cat_str, + file=fo) + last_user = user + if clk: + movie_id_list.append(movie_id) + cate1_list.append(cat1) + line_idx += 1 diff --git a/modelzoo/FNN/data/script/pick2txt.py b/modelzoo/FNN/data/script/pick2txt.py new file mode 100644 index 00000000000..b7c129ffbe0 --- /dev/null +++ b/modelzoo/FNN/data/script/pick2txt.py @@ -0,0 +1,14 @@ +import pickle + +def pkl2txt(filename): + pklfile = pickle.load(open(filename+'.pkl', 'rb')) + with open(filename+'.txt','w') as f: + f.write('\n'.join(pklfile)) + + + + +if __name__ == '__main__': + pkl2txt('uid_voc') + pkl2txt('mid_voc') + pkl2txt('cat_voc') \ No newline at end of file diff --git a/modelzoo/FNN/data/script/process_data.py b/modelzoo/FNN/data/script/process_data.py new file mode 100644 index 00000000000..0bff64f30bd --- /dev/null +++ b/modelzoo/FNN/data/script/process_data.py @@ -0,0 +1,108 @@ +import sys +import random +import time + + +def process_meta(file): + fi = open(file, "r") + fo = open("item-info", "w") + for line in fi: + obj = eval(line) + cat = obj["categories"][0][-1] + print(obj["asin"] + "\t" + cat, file=fo) + + +def process_reviews(file): + fi = open(file, "r") + user_map = {} + fo = open("reviews-info", "w") + for line in fi: + obj = eval(line) + userID = obj["reviewerID"] + itemID = obj["asin"] + rating = obj["overall"] + time = obj["unixReviewTime"] + print(userID + "\t" + itemID + "\t" + str(rating) + "\t" + str(time), + file=fo) + + +def manual_join(): + f_rev = open("reviews-info", "r") + user_map = {} + item_list = [] + for line in f_rev: + line = line.strip() + items = line.split("\t") + #loctime = time.localtime(float(items[-1])) + #items[-1] = time.strftime('%Y-%m-%d', loctime) + if items[0] not in user_map: + user_map[items[0]] = [] + user_map[items[0]].append(("\t".join(items), float(items[-1]))) + item_list.append(items[1]) + f_meta = open("item-info", "r") + meta_map = {} + for line in f_meta: + arr = line.strip().split("\t") + if arr[0] not in meta_map: + meta_map[arr[0]] = arr[1] + arr = line.strip().split("\t") + fo = open("jointed-new", "w") + for key in user_map: + sorted_user_bh = sorted(user_map[key], key=lambda x: x[1]) + for line, t in sorted_user_bh: + items = line.split("\t") + asin = items[1] + j = 0 + while True: + asin_neg_index = random.randint(0, len(item_list) - 1) + asin_neg = item_list[asin_neg_index] + if asin_neg == asin: + continue + items[1] = asin_neg + print("0" + "\t" + "\t".join(items) + "\t" + + meta_map[asin_neg], + file=fo) + j += 1 + if j == 1: #negative sampling frequency + break + if asin in meta_map: + print("1" + "\t" + line + "\t" + meta_map[asin], file=fo) + else: + print("1" + "\t" + line + "\t" + "default_cat", file=fo) + + +def split_test(): + fi = open("jointed-new", "r") + fo = open("jointed-new-split-info", "w") + user_count = {} + for line in fi: + line = line.strip() + user = line.split("\t")[1] + if user not in user_count: + user_count[user] = 0 + user_count[user] += 1 + fi.seek(0) + i = 0 + last_user = "A26ZDKC53OP6JD" + for line in fi: + line = line.strip() + user = line.split("\t")[1] + if user == last_user: + if i < user_count[user] - 2: # 1 + negative samples + print("20180118" + "\t" + line, file=fo) + else: + print("20190119" + "\t" + line, file=fo) + else: + last_user = user + i = 0 + if i < user_count[user] - 2: + print("20180118" + "\t" + line, file=fo) + else: + print("20190119" + "\t" + line, file=fo) + i += 1 + + +process_meta(sys.argv[1]) +process_reviews(sys.argv[2]) +manual_join() +split_test() diff --git a/modelzoo/FNN/data/script/split_by_user.py b/modelzoo/FNN/data/script/split_by_user.py new file mode 100644 index 00000000000..cc7988c6601 --- /dev/null +++ b/modelzoo/FNN/data/script/split_by_user.py @@ -0,0 +1,18 @@ +import random + +fi = open("local_test", "r") +ftrain = open("local_train_splitByUser", "w") +ftest = open("local_test_splitByUser", "w") + +while True: + rand_int = random.randint(1, 10) + noclk_line = fi.readline().strip() + clk_line = fi.readline().strip() + if noclk_line == "" or clk_line == "": + break + if rand_int == 2: + print(noclk_line, file=ftest) + print(clk_line, file=ftest) + else: + print(noclk_line, file=ftrain) + print(clk_line, file=ftrain) diff --git a/modelzoo/FNN/result/README.md b/modelzoo/FNN/result/README.md index 6f962fb1716..ccec44eb9a5 100644 --- a/modelzoo/FNN/result/README.md +++ b/modelzoo/FNN/result/README.md @@ -1,2 +1,2 @@ # Result -Evaluation Metrics file are default saved in this folder. +Checkpoint & timeline file are default saved in this folder. diff --git a/modelzoo/FNN/script/feature_column.py b/modelzoo/FNN/script/feature_column.py index 0569e32d3c3..69650e9e9ac 100644 --- a/modelzoo/FNN/script/feature_column.py +++ b/modelzoo/FNN/script/feature_column.py @@ -14,6 +14,33 @@ import pandas as pd import numpy as np +fi = open('../../deep_ctr_master/data/fm.model.txt','r') + +first = True +feat_weights={} +k=0 +for line in fi: + s = line.strip().split() + if first: + first = False + w_0 = float(s[0]) + feat_num = int(s[1]) + k = int(s[2]) + 1 # w and v + + else: + feat = int(s[0]) + weights = [float(s[1 + i]) for i in range(k)] + feat_weights[feat] = weights + +list1 =[] +for col,val in feat_weights.items(): + list1.append(val) + +# def my_init(shape,dtype=None): +# weight = np.array(list1) +# +# return weight.reshape(shape) + DEFAULT_GROUP_NAME = "default_group" @@ -31,7 +58,9 @@ def __new__(cls, name, vocabulary_size, embedding_dim=4, use_hash=False, vocabul if embedding_dim == "auto": embedding_dim = 6 * int(pow(vocabulary_size, 0.25)) if embeddings_initializer is None: - embeddings_initializer = RandomNormal(mean=0.0, stddev=0.0001, seed=2020) + embeddings_initializer = RandomNormal(mean=0.0, stddev=0.001, seed=2020) + # if embeddings_initializer=='fm': + # embeddings_initializer = my_init(shape=(vocabulary_size,embedding_dim)) @@ -161,11 +190,11 @@ def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=10 for i in range(len(linear_feature_columns)): if isinstance(linear_feature_columns[i], SparseFeat): linear_feature_columns[i] = linear_feature_columns[i]._replace(embedding_dim=1, - embeddings_initializer=Zeros()) + embeddings_initializer=RandomNormal(mean=0.0, stddev=0.01, seed=2020)) if isinstance(linear_feature_columns[i], VarLenSparseFeat): linear_feature_columns[i] = linear_feature_columns[i]._replace( sparsefeat=linear_feature_columns[i].sparsefeat._replace(embedding_dim=1, - embeddings_initializer=Zeros())) + embeddings_initializer=RandomNormal(mean=0.0, stddev=0.01, seed=2020))) linear_emb_list = [input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix + str(i))[0] for i in range(units)] diff --git a/modelzoo/FNN/script/layers/utils.py b/modelzoo/FNN/script/layers/utils.py index 2be8f3fe5ef..7808e376dbd 100644 --- a/modelzoo/FNN/script/layers/utils.py +++ b/modelzoo/FNN/script/layers/utils.py @@ -6,6 +6,7 @@ """ import tensorflow as tf +import numpy as np from tensorflow.python.keras.layers import Flatten, Concatenate, Layer, Add from tensorflow.python.ops.lookup_ops import TextFileInitializer @@ -188,6 +189,7 @@ def get_config(self, ): def concat_func(inputs, axis=-1, mask=False): if not mask: inputs = list(map(NoMask(), inputs)) + if len(inputs) == 1: return inputs[0] else: diff --git a/modelzoo/FNN/script/utils.py b/modelzoo/FNN/script/utils.py index 7fe3b25a518..6425e58df6c 100644 --- a/modelzoo/FNN/script/utils.py +++ b/modelzoo/FNN/script/utils.py @@ -37,7 +37,7 @@ def check(version): latest_version = max(latest_version, ver) if latest_version > version: logging.warning( - '\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format( + '\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U script` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format( latest_version, version)) except: print("Please check the latest version manually on https://pypi.org/project/deepctr/#history") diff --git a/modelzoo/FNN/train.py b/modelzoo/FNN/train.py index 92d94bced4e..aca9d9037e2 100644 --- a/modelzoo/FNN/train.py +++ b/modelzoo/FNN/train.py @@ -1,4 +1,6 @@ import os +import sys +import argparse import pandas as pd import numpy as np import tensorflow as tf @@ -6,126 +8,194 @@ import math from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.optimizers import Adam -from sklearn.metrics import log_loss, roc_auc_score -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder, MinMaxScaler,MultiLabelBinarizer from script.models.fnn import FNN from script.feature_column import SparseFeat, DenseFeat, get_feature_names,VarLenSparseFeat -import gc +import collections os.environ['CUDA_VISIBLE_DEVICES'] = '0' -def split(x): - key_ans = x.split(',') - for key in key_ans: - if key not in key2index: - key2index[key] = len(key2index) + 1 - return list(map(lambda x: key2index[x], key_ans)) +UNSEQ_COLUMNS = ['UID', 'ITEM', 'CATEGORY'] +LABEL_COLUMN = ['CLICKED'] +TRAIN_DATA_COLUMNS = LABEL_COLUMN + UNSEQ_COLUMNS -if __name__=="__main__": - path = 'data/' - datalist = ['1458','2259','2261','2997','3386','all'] - - for file in datalist: - - data = pd.read_csv(path+file+'/train.log.txt',encoding="utf-8", - header=0,sep="\t",low_memory=False) - - test_data = pd.read_csv(path+file+'/test.log.txt',encoding="utf-8", - header=0,sep="\t",low_memory=False) - - - data = data[['click','weekday','hour','useragent','IP','region', 'city', 'adexchange', 'domain', 'slotid','slotwidth', - 'slotheight', 'slotvisibility', 'slotformat', 'creative', 'advertiser', 'slotprice']] - - test_data = test_data[['click','weekday','hour','useragent','IP','region', 'city', 'adexchange', 'domain', 'slotid','slotwidth', - 'slotheight', 'slotvisibility', 'slotformat', 'creative', 'advertiser', 'slotprice']] - - data['istest']=0 - test_data['istest']=1 - df = pd.concat([data, test_data], axis=0, ignore_index=True) - del data, test_data - gc.collect() - - - df.dropna(subset=['click'],inplace=True) +EMBEDDING_DIM=8 - df['adexchange'].fillna(0,inplace=True) - df['adexchange']=df['adexchange'].astype(int) +def build_model_input(filename=None,chunkSize=1e6,loop=True): + chunks=[] + data = pd.read_csv(filename, encoding="utf-8", header=None, names=TRAIN_DATA_COLUMNS, iterator=True) + while loop: + try: + chunk = data.get_chunk(chunkSize) + chunks.append(chunk) + except StopIteration: + loop=False + dataset = pd.concat(chunks) + return dataset - df.fillna('unknown', inplace=True) - dense_features = ['weekday', 'hour','region','city','adexchange','slotwidth','slotheight', - 'advertiser', 'slotprice' ] +def build_feature_columns(data_location=None): + if data_location: + uid_file = os.path.join(data_location, 'uid_labelencode.csv') + mid_file = os.path.join(data_location, 'mid_labelencode.csv') + cat_file = os.path.join(data_location, 'cat_labelencode.csv') + if (not os.path.exists(uid_file)) or (not os.path.exists(mid_file)) or ( + not os.path.exists(cat_file)): + print("uid_labelencode.csv, mid_labelencode.csv or cat_labelencode.csv does not exist in data file.") + sys.exit() + uid_data = pd.read_csv(uid_file,encoding="utf-8") + mid_data = pd.read_csv(mid_file,encoding="utf-8") + cat_data = pd.read_csv(cat_file,encoding="utf-8") - sparse_features=[] - target='click' - for col in df.columns: - if col not in dense_features and col not in ['istest','click']: - lbe = LabelEncoder() - df[col] = lbe.fit_transform(df[col]) - df[col]=lbe.fit_transform(df[col]) - sparse_features.append(col) + feature_column=[SparseFeat('UID', vocabulary_size=uid_data['UID'+'_encode'].max() + 1, embedding_dim=EMBEDDING_DIM,embeddings_initializer=None), + SparseFeat('ITEM',vocabulary_size=mid_data['ITEM'+'_encode'].max()+1,embedding_dim=EMBEDDING_DIM,embeddings_initializer=None), + SparseFeat('CATEGORY',vocabulary_size=cat_data['CATEGORY'+'_encode'].max()+1,embedding_dim=EMBEDDING_DIM,embeddings_initializer=None)] - mms = MinMaxScaler(feature_range=(0, 1)) + else: + print("data_location does not exist in data file. ") + sys.exit() - df[dense_features] = mms.fit_transform(df[dense_features]) + return feature_column - fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=df[feat].max() + 1, embedding_dim=11,embeddings_initializer=None) - for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, ) - for feat in dense_features] - linear_feature_columns = fixlen_feature_columns - dnn_feature_columns = fixlen_feature_columns +def main(train_data=None,test_data=None,feature_colums=None): + feature_names = get_feature_names(feature_colums) + model = FNN(feature_colums, feature_colums, dnn_hidden_units=args.dnn_hidden_units,l2_reg_embedding=args.l2_reg_embedding, + l2_reg_linear=args.l2_reg_linear,l2_reg_dnn=args.l2_reg_dnn,seed=args.seed,dnn_dropout=args.dnn_dropout, + dnn_activation=args.dnn_activation,task=args.task) + if args.optimizer=='adam': + optimizer = Adam(learning_rate=args.learning_rate, amsgrad=False) + model.compile(optimizer, loss=args.loss, + metrics=args.metrics) + saver = tf.train.Saver() + gpu_options = tf.GPUOptions(allow_growth=True) + with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: + if args.training: + train_inputs = {name: train_data[name].values for name in feature_names} + sess.run(tf.tables_initializer()) + history = model.fit(train_inputs, train_data[LABEL_COLUMN].values, + batch_size=args.batch_size, epochs=args.epochs, + verbose=args.verbose,validation_split=args.validation_split) + saver.save(sess,args.save_path,global_step=args.save_step) + + else: + #new_saver = tf.train.import_meta_graph(save_path+'model.ckpt.meta') + + saver.restore(sess, tf.train.latest_checkpoint(args.save_path)) + test_inputs = {name:test_data[name].values for name in feature_names} + pred_ans = model.predict(test_inputs, batch_size=args.batch_size) + + +# Get parse +def get_arg_parser(): + parser = argparse.ArgumentParser() + parser.add_argument('--learning_rate', + help='Learning rate for model', + type=float, + default=0.001) + parser.add_argument('--save_path', + help='Full path to model output directory', + required=False, + default='results/') + parser.add_argument('--batch_size', + help='Batch size to train. Default is 512', + type=int, + default=512) + parser.add_argument('--training', + help='train or eval ', + type=bool, + default=True) + parser.add_argument('--epochs', + help='Epoch to train.Default is 50', + type=int, + default=1) + parser.add_argument('--save_step', + help='set the number of steps on saving checkpoints', + type=int, + default=1) + parser.add_argument('--verbose', + help='set the random seed for tensorflow.', + choices=[0,1,2], + default=2) + parser.add_argument('--validation_split', + help='Validation split.', + type=float, + default=0.2) + parser.add_argument('--optimizer', + type=str, + default='adam') + parser.add_argument('--dnn_hidden_units', + type=tuple, + help='An iterable containing all the features used by deep part of the model.', + default=(256, 128, 64)) + parser.add_argument('--l2_reg_embedding', + help=' L2 regularizer strength applied to embedding vector.', + type=float, + default=0.00001) + parser.add_argument('--l2_reg_linear', + help='L2 regularizer strength applied to linear weight.', + type=float, + default=0.00001) + parser.add_argument('--l2_reg_dnn', + help='L2 regularizer strength applied to DNN.', + type=float, + default=0) + parser.add_argument('--seed', + help='to use as random seed.', + type=int, + default=1024) + parser.add_argument('--dnn_dropout', + help='the probability we will drop out a given DNN coordinate,float in [0,1).', + type=float, + default=0) + parser.add_argument('--dnn_activation', + help='Activation function to use in DNN.', + type=str, + default='relu') + parser.add_argument('--task', + help='``"binary"`` for binary logloss or ``"regression"`` for regression loss.', + type=str, + choices=['binary', 'regression'], + default='binary') + parser.add_argument('--loss', + type=str, + default='binary_crossentropy') + parser.add_argument('--metrics', + type=list, + default=['binary_crossentropy', 'AUC']) + + + return parser +if __name__=="__main__": + path = 'dataset' + train_path = path+'/local_train_splitByUser_to_labelencode.txt' + test_path = path+'/local_test_splitByUser_to_labelencode.txt' + feature_colums = build_feature_columns(path) - feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) - - # 3.generate train&test input data for model - cols = [f for f in df.columns if f not in ['click', 'istest']] - train = df[df.istest==0][cols] - test = df[df.istest==1][cols] - - train_model_input = {name: train[name] for name in feature_names} - test_model_input = {name: test[name] for name in feature_names} - - gpu_options = tf.GPUOptions(allow_growth=True) - - - model = FNN(linear_feature_columns, dnn_feature_columns,task='binary',dnn_hidden_units=(128, 64, 32)) - - adam = Adam(learning_rate=0.001,amsgrad=False) + train_data = build_model_input(train_path) + test_data = build_model_input(test_path) - model.compile(adam, "binary_crossentropy", - metrics=['binary_crossentropy','AUC']) + feature_names = get_feature_names(feature_colums) - with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: + parser = get_arg_parser() + args = parser.parse_args() + main(train_data,test_data,feature_colums) - sess.run(tf.tables_initializer()) - history = model.fit(train_model_input, df[df.istest==0][target].values, - batch_size=128, epochs=50, verbose=2, validation_split=0.2) - pred_ans = model.predict(test_model_input, batch_size=128) - test_auc = roc_auc_score(df[df.istest==1][target].values,pred_ans) - print('test_auc=',test_auc) - with open('result/result.txt','a+') as tx: - print(file+" test LogLoss", round(log_loss(df[df.istest==1][target].values, pred_ans), 4),file=tx) - print(file+" test AUC", round(roc_auc_score(df[df.istest==1][target].values, pred_ans), 4),file=tx) - print('='*50,file=tx) From 0cc1389bc3e6883b1724236dc6b76814cc5236b0 Mon Sep 17 00:00:00 2001 From: lihangtian <936971274@qq.com> Date: Wed, 12 Oct 2022 15:55:45 +0800 Subject: [PATCH 7/8] [ModelZoo] Support FwFM --- modelzoo/FwFM/README.md | 85 + modelzoo/FwFM/data/README.md | 4 + modelzoo/FwFM/data/prepare_data.sh | 15 + modelzoo/FwFM/data/script/data2labelencode.py | 54 + modelzoo/FwFM/data/script/generate_neg.py | 63 + modelzoo/FwFM/data/script/generate_voc.py | 66 + .../FwFM/data/script/history_behavior_list.py | 41 + modelzoo/FwFM/data/script/item_map.py | 29 + modelzoo/FwFM/data/script/local_aggretor.py | 47 + modelzoo/FwFM/data/script/pick2txt.py | 14 + modelzoo/FwFM/data/script/process_data.py | 108 ++ modelzoo/FwFM/data/script/split_by_user.py | 18 + modelzoo/FwFM/result/README.md | 2 + modelzoo/FwFM/script/__init__.py | 0 modelzoo/FwFM/script/contrib/__init__.py | 0 modelzoo/FwFM/script/contrib/rnn.py | 1153 +++++++++++++ modelzoo/FwFM/script/contrib/rnn_v2.py | 1452 ++++++++++++++++ modelzoo/FwFM/script/contrib/utils.py | 378 +++++ modelzoo/FwFM/script/estimator/__init__.py | 1 + .../FwFM/script/estimator/feature_column.py | 52 + modelzoo/FwFM/script/estimator/inputs.py | 52 + .../FwFM/script/estimator/models/__init__.py | 13 + modelzoo/FwFM/script/estimator/models/fwfm.py | 84 + modelzoo/FwFM/script/estimator/utils.py | 217 +++ modelzoo/FwFM/script/feature_column.py | 249 +++ modelzoo/FwFM/script/inputs.py | 155 ++ modelzoo/FwFM/script/layers/__init__.py | 52 + modelzoo/FwFM/script/layers/activation.py | 85 + modelzoo/FwFM/script/layers/core.py | 267 +++ modelzoo/FwFM/script/layers/interaction.py | 1492 +++++++++++++++++ modelzoo/FwFM/script/layers/normalization.py | 51 + modelzoo/FwFM/script/layers/sequence.py | 901 ++++++++++ modelzoo/FwFM/script/layers/utils.py | 302 ++++ modelzoo/FwFM/script/models/__init__.py | 3 + modelzoo/FwFM/script/models/fwfm.py | 72 + modelzoo/FwFM/script/utils.py | 46 + modelzoo/FwFM/train.py | 255 +++ 37 files changed, 7878 insertions(+) create mode 100644 modelzoo/FwFM/README.md create mode 100644 modelzoo/FwFM/data/README.md create mode 100644 modelzoo/FwFM/data/prepare_data.sh create mode 100644 modelzoo/FwFM/data/script/data2labelencode.py create mode 100644 modelzoo/FwFM/data/script/generate_neg.py create mode 100644 modelzoo/FwFM/data/script/generate_voc.py create mode 100644 modelzoo/FwFM/data/script/history_behavior_list.py create mode 100644 modelzoo/FwFM/data/script/item_map.py create mode 100644 modelzoo/FwFM/data/script/local_aggretor.py create mode 100644 modelzoo/FwFM/data/script/pick2txt.py create mode 100644 modelzoo/FwFM/data/script/process_data.py create mode 100644 modelzoo/FwFM/data/script/split_by_user.py create mode 100644 modelzoo/FwFM/result/README.md create mode 100644 modelzoo/FwFM/script/__init__.py create mode 100644 modelzoo/FwFM/script/contrib/__init__.py create mode 100644 modelzoo/FwFM/script/contrib/rnn.py create mode 100644 modelzoo/FwFM/script/contrib/rnn_v2.py create mode 100644 modelzoo/FwFM/script/contrib/utils.py create mode 100644 modelzoo/FwFM/script/estimator/__init__.py create mode 100644 modelzoo/FwFM/script/estimator/feature_column.py create mode 100644 modelzoo/FwFM/script/estimator/inputs.py create mode 100644 modelzoo/FwFM/script/estimator/models/__init__.py create mode 100644 modelzoo/FwFM/script/estimator/models/fwfm.py create mode 100644 modelzoo/FwFM/script/estimator/utils.py create mode 100644 modelzoo/FwFM/script/feature_column.py create mode 100644 modelzoo/FwFM/script/inputs.py create mode 100644 modelzoo/FwFM/script/layers/__init__.py create mode 100644 modelzoo/FwFM/script/layers/activation.py create mode 100644 modelzoo/FwFM/script/layers/core.py create mode 100644 modelzoo/FwFM/script/layers/interaction.py create mode 100644 modelzoo/FwFM/script/layers/normalization.py create mode 100644 modelzoo/FwFM/script/layers/sequence.py create mode 100644 modelzoo/FwFM/script/layers/utils.py create mode 100644 modelzoo/FwFM/script/models/__init__.py create mode 100644 modelzoo/FwFM/script/models/fwfm.py create mode 100644 modelzoo/FwFM/script/utils.py create mode 100644 modelzoo/FwFM/train.py diff --git a/modelzoo/FwFM/README.md b/modelzoo/FwFM/README.md new file mode 100644 index 00000000000..6e5ebfa5cb9 --- /dev/null +++ b/modelzoo/FwFM/README.md @@ -0,0 +1,85 @@ +# FwFM + +The following is a brief directory structure and description for this example: + + + +``` +├── data # Data set directory +│ ├── prepare_data.sh # Shell script to download and process dataset +│ └── README.md # Documentation describing how to prepare dataset +│ └──script # Directory contains scripts to process dataset +│ ├──data2labelencode # Convert data to csv file +│ ├── generate_neg.py # Create negative sample +│ ├── generate_voc.py # Create a list of features +│ ├── history_behavior_list.py # Count user's history behaviors +│ ├── item_map.py # Create a map between item id and cate +│ ├── local_aggretor.py # Generate sample data +│ ├── pick2txt.py # Convert voc's format +│ ├── process_data.py # Parse raw json data +│ └── split_by_user.py # Divide the dataset +├── script # model set directory +│ ├── contrib #Directory contains rnn +│ ├── estimator #Directory contains estimator to data +│ ├── layers #Directory contains layers of model +│ ├── models #Directory contains FNN model +│ ├── feature_column.py # Feature marker +│ ├── inputs.py #Construction of Input Layer +│ └──utils +├── train.py # Training script +└── README.md # Documentation +``` + + + +## Content + +[TOC] + + + +## Model Structure + +Implementation of paper "Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising". + + + +## Usage + +### Stand-alone Training + +1. Please prepare the data set and DeepRec env. + + 1. Manually + + - Follow [dataset preparation](https://github.com/alibaba/DeepRec/tree/main/modelzoo/DIEN#prepare) to prepare data set. + - Download code by `git clone https://github.com/alibaba/DeepRec` + - Follow [How to Build](https://github.com/alibaba/DeepRec#how-to-build) to build DeepRec whl package and install by `pip install $DEEPREC_WHL`. + + 2. Docker(Recommended) + + ``` + docker pull alideeprec/deeprec-release-modelzoo:latest + docker run -it alideeprec/deeprec-release-modelzoo:latest /bin/bash + + # In docker container + cd /root/modelzoo/CAN + ``` + +​ 2.train. + +``` + python train.py +``` + +​ + + + +## Dataset + + Amazon Dataset Books dataset is used as benchmark dataset. + +### Prepare + +For details of Data download, see [Data Preparation](https://github.com/Atomu2014/make-ipinyou-data) diff --git a/modelzoo/FwFM/data/README.md b/modelzoo/FwFM/data/README.md new file mode 100644 index 00000000000..15a0bc61c8d --- /dev/null +++ b/modelzoo/FwFM/data/README.md @@ -0,0 +1,4 @@ +make-ipinyou-data +================= + +For details of Data download, see [Data Preparation](https://github.com/Atomu2014/make-ipinyou-data) diff --git a/modelzoo/FwFM/data/prepare_data.sh b/modelzoo/FwFM/data/prepare_data.sh new file mode 100644 index 00000000000..49fdb9a0da1 --- /dev/null +++ b/modelzoo/FwFM/data/prepare_data.sh @@ -0,0 +1,15 @@ +wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Books.json.gz +wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz +gunzip reviews_Books.json.gz +gunzip meta_Books.json.gz + +python script/process_data.py meta_Books.json reviews_Books.json +python script/local_aggretor.py +python script/split_by_user.py +python script/generate_voc.py + +python script/item_map.py +python script/history_behavior_list.py +python script/generate_neg.py + +python script/data2labelencode.py \ No newline at end of file diff --git a/modelzoo/FwFM/data/script/data2labelencode.py b/modelzoo/FwFM/data/script/data2labelencode.py new file mode 100644 index 00000000000..04daba5e28a --- /dev/null +++ b/modelzoo/FwFM/data/script/data2labelencode.py @@ -0,0 +1,54 @@ +import pandas as pd +import numpy as np +import pickle + +UNSEQ_COLUMNS = ['UID', 'ITEM', 'CATEGORY'] +HIS_COLUMNS = ['HISTORY_ITEM', 'HISTORY_CATEGORY'] +SEQ_COLUMNS = HIS_COLUMNS +LABEL_COLUMN = ['CLICKED'] +TRAIN_DATA_COLUMNS = LABEL_COLUMN + UNSEQ_COLUMNS + SEQ_COLUMNS + + + +def inputs_to_labelencode(filename): + def encoder_dict(data, category_col): + category_dict = data[category_col].value_counts() + category_dict = pd.Series(np.arange(0, len(category_dict)), index=category_dict.index).to_dict() + data[category_col + '_encode'] = data[category_col].map(category_dict).astype('int32') + return data + + uid_file = '../CAN/data/uid_voc.txt' + mid_file = '../CAN/data/mid_voc.txt' + cat_file = '../CAN/data/cat_voc.txt' + + uid_data = pd.read_csv(uid_file, encoding="utf-8", header=None, names=['UID']) + mid_data = pd.read_csv(mid_file, encoding="utf-8", header=None, names=['ITEM']) + cat_data = pd.read_csv(cat_file, encoding="utf-8", header=None, names=['CATEGORY']) + + uid_data = encoder_dict(uid_data, 'UID') + mid_data = encoder_dict(mid_data, 'ITEM') + cat_data = encoder_dict(cat_data, 'CATEGORY') + + dataset = pd.read_csv(filename, encoding="utf-8", + header=None, names=TRAIN_DATA_COLUMNS, sep="\t", low_memory=False) + for key in ['UID','ITEM','CATEGORY']: + if key=='UID': + dataset = pd.merge(dataset, uid_data, on=key, how='inner') + elif key=='ITEM': + dataset = pd.merge(dataset, mid_data, on=key, how='inner') + else: + dataset = pd.merge(dataset, cat_data, on=key, how='inner') + + dataset = dataset.drop(UNSEQ_COLUMNS + SEQ_COLUMNS, axis=1) + + dataset.to_csv(filename + '_to_labelencode.txt',index=0,header=0) + uid_data.to_csv('dataset/uid_labelencode.csv',index=False) + mid_data.to_csv('dataset/mid_labelencode.csv',index=False) + cat_data.to_csv('dataset/cat_labelencode.csv',index=False) + + + +if __name__ == '__main__': + inputs_to_labelencode('../CAN/data/local_train_splitByUser') + inputs_to_labelencode('../CAN/data/local_test_splitByUser') + diff --git a/modelzoo/FwFM/data/script/generate_neg.py b/modelzoo/FwFM/data/script/generate_neg.py new file mode 100644 index 00000000000..a10ef919e13 --- /dev/null +++ b/modelzoo/FwFM/data/script/generate_neg.py @@ -0,0 +1,63 @@ +import random + +NEG_SEQ_LENGTH_FOR_EACH_HISTORY_ITEM = 1 + + +def createNegData(file): + with open(file, 'r') as f_raw: + with open(file + '_neg', 'w') as f_out: + FirstLine = True + for line in f_raw: + linelist = line.strip().split('\t') + uid = linelist[1] + + if uid not in user_history_behavior: + str = '\t' + else: + his_items = linelist[4].split('') + neg_items_str = '' + neg_cates_str = '' + for pos in his_items: + tmp_items_str = '' + tmp_cates_str = '' + tmp_items = [] + tmp_cates = [] + neg_length = 0 + while (True): + index = random.randint( + 0, + len(user_history_behavior[uid][0]) - 1) + if user_history_behavior[uid][0][index] != pos: + tmp_items.append( + user_history_behavior[uid][0][index]) + tmp_cates.append( + user_history_behavior[uid][1][index]) + neg_length += 1 + if neg_length >= NEG_SEQ_LENGTH_FOR_EACH_HISTORY_ITEM: + break + for item in tmp_items: + tmp_items_str += (item + '') + for cate in tmp_cates: + tmp_cates_str += (cate + '') + neg_items_str += (tmp_items_str[:-1] + '') + neg_cates_str += (tmp_cates_str[:-1] + '') + str = neg_items_str[:-1] + '\t' + neg_cates_str[:-1] + if FirstLine: + f_out.write(str) + FirstLine = False + else: + f_out.write('\n' + str) + + +user_history_behavior = {} +with open('user_history_behavior.txt', 'r') as f: + for line in f: + linelist = line.strip().split('\t') + uid = linelist[0] + items = linelist[1].split('') + cates = linelist[2].split('') + user_history_behavior[uid] = [items, cates] + +data_file = ['local_test_splitByUser', 'local_train_splitByUser'] +for file in data_file: + createNegData(file) diff --git a/modelzoo/FwFM/data/script/generate_voc.py b/modelzoo/FwFM/data/script/generate_voc.py new file mode 100644 index 00000000000..447fe6393b7 --- /dev/null +++ b/modelzoo/FwFM/data/script/generate_voc.py @@ -0,0 +1,66 @@ +# import cPickle +import pickle as cPickle + +f_train = open("local_train_splitByUser", "r") +uid_dict = {} +mid_dict = {} +cat_dict = {} + +iddd = 0 +for line in f_train: + arr = line.strip("\n").split("\t") + clk = arr[0] + uid = arr[1] + mid = arr[2] + cat = arr[3] + mid_list = arr[4] + cat_list = arr[5] + if uid not in uid_dict: + uid_dict[uid] = 0 + uid_dict[uid] += 1 + if mid not in mid_dict: + mid_dict[mid] = 0 + mid_dict[mid] += 1 + if cat not in cat_dict: + cat_dict[cat] = 0 + cat_dict[cat] += 1 + if len(mid_list) == 0: + continue + for m in mid_list.split(""): + if m not in mid_dict: + mid_dict[m] = 0 + mid_dict[m] += 1 + #print iddd + iddd+=1 + for c in cat_list.split(""): + if c not in cat_dict: + cat_dict[c] = 0 + cat_dict[c] += 1 + +sorted_uid_dict = sorted(uid_dict.items(), key=lambda x:x[1], reverse=True) +sorted_mid_dict = sorted(mid_dict.items(), key=lambda x:x[1], reverse=True) +sorted_cat_dict = sorted(cat_dict.items(), key=lambda x:x[1], reverse=True) + +uid_voc = {} +index = 0 +for key, value in sorted_uid_dict: + uid_voc[key] = index + index += 1 + +mid_voc = {} +mid_voc["default_mid"] = 0 +index = 1 +for key, value in sorted_mid_dict: + mid_voc[key] = index + index += 1 + +cat_voc = {} +cat_voc["default_cat"] = 0 +index = 1 +for key, value in sorted_cat_dict: + cat_voc[key] = index + index += 1 + +cPickle.dump(uid_voc, open("uid_voc.pkl", "wb")) +cPickle.dump(mid_voc, open("mid_voc.pkl", "wb")) +cPickle.dump(cat_voc, open("cat_voc.pkl", "wb")) diff --git a/modelzoo/FwFM/data/script/history_behavior_list.py b/modelzoo/FwFM/data/script/history_behavior_list.py new file mode 100644 index 00000000000..6adaf398cef --- /dev/null +++ b/modelzoo/FwFM/data/script/history_behavior_list.py @@ -0,0 +1,41 @@ +item_to_cate_map = {} +with open('item2catmap.txt', 'r') as f: + for line in f: + linelist = line.strip().split('\t') + item = linelist[0] + cate = linelist[1] + item_to_cate_map[item] = cate + +user_history_behavior = {} +with open('reviews-info', 'r') as f: + for line in f: + linelist = line.strip().split('\t') + uid = linelist[0] + item = linelist[1] + if uid not in user_history_behavior: + user_history_behavior[uid] = [item] + else: + if item not in user_history_behavior[uid]: + user_history_behavior[uid].append(item) + +FirstLine = True +with open('user_history_behavior.txt', 'w') as f: + for uid, items in user_history_behavior.items(): + itemstr = '' + catestr = '' + for i in items: + if i in item_to_cate_map: + c = item_to_cate_map[i] + else: + c = 'Unknown' + if not itemstr: + itemstr += i + catestr += c + else: + itemstr += ('' + i) + catestr += ('' + c) + if FirstLine: + f.write(uid + '\t' + itemstr + '\t' + catestr) + FirstLine = False + else: + f.write('\n' + uid + '\t' + itemstr + '\t' + catestr) diff --git a/modelzoo/FwFM/data/script/item_map.py b/modelzoo/FwFM/data/script/item_map.py new file mode 100644 index 00000000000..94bebee5184 --- /dev/null +++ b/modelzoo/FwFM/data/script/item_map.py @@ -0,0 +1,29 @@ +import sys +from tqdm import tqdm + +data_file = ['local_test_splitByUser', 'local_train_splitByUser'] + +item_to_cate_map = {} +# 367983 +for file_name in data_file: + with open(file_name, 'r') as f: + for line in f: + linelist = line.strip().split('\t') + items = linelist[4].split('') + cates = linelist[5].split('') + items.append(linelist[2]) + cates.append(linelist[3]) + # print(items) + # print(cates) + for index, item in enumerate(items): + if item not in item_to_cate_map: + item_to_cate_map[item] = cates[index] + +with open('item2catmap.txt', 'w') as f: + firstline = True + for item, cate in item_to_cate_map.items(): + if firstline: + f.write(item + '\t' + cate) + firstline = False + else: + f.write('\n' + item + '\t' + cate) diff --git a/modelzoo/FwFM/data/script/local_aggretor.py b/modelzoo/FwFM/data/script/local_aggretor.py new file mode 100644 index 00000000000..1fd8aceb32c --- /dev/null +++ b/modelzoo/FwFM/data/script/local_aggretor.py @@ -0,0 +1,47 @@ +import sys +import hashlib +import random + +fin = open("jointed-new-split-info", "r") +ftrain = open("local_train", "w") +ftest = open("local_test", "w") + +last_user = "0" +common_fea = "" +line_idx = 0 +for line in fin: + items = line.strip().split("\t") + ds = items[0] + clk = int(items[1]) + user = items[2] + movie_id = items[3] + dt = items[5] + cat1 = items[6] + + if ds == "20180118": + fo = ftrain + else: + fo = ftest + if user != last_user: + movie_id_list = [] + cate1_list = [] + #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + "" + else: + history_clk_num = len(movie_id_list) + cat_str = "" + mid_str = "" + for c1 in cate1_list: + cat_str += c1 + "" + for mid in movie_id_list: + mid_str += mid + "" + if len(cat_str) > 0: cat_str = cat_str[:-1] + if len(mid_str) > 0: mid_str = mid_str[:-1] + if history_clk_num >= 1: # 8 is the average length of user behavior + print(items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 + + "\t" + mid_str + "\t" + cat_str, + file=fo) + last_user = user + if clk: + movie_id_list.append(movie_id) + cate1_list.append(cat1) + line_idx += 1 diff --git a/modelzoo/FwFM/data/script/pick2txt.py b/modelzoo/FwFM/data/script/pick2txt.py new file mode 100644 index 00000000000..b7c129ffbe0 --- /dev/null +++ b/modelzoo/FwFM/data/script/pick2txt.py @@ -0,0 +1,14 @@ +import pickle + +def pkl2txt(filename): + pklfile = pickle.load(open(filename+'.pkl', 'rb')) + with open(filename+'.txt','w') as f: + f.write('\n'.join(pklfile)) + + + + +if __name__ == '__main__': + pkl2txt('uid_voc') + pkl2txt('mid_voc') + pkl2txt('cat_voc') \ No newline at end of file diff --git a/modelzoo/FwFM/data/script/process_data.py b/modelzoo/FwFM/data/script/process_data.py new file mode 100644 index 00000000000..0bff64f30bd --- /dev/null +++ b/modelzoo/FwFM/data/script/process_data.py @@ -0,0 +1,108 @@ +import sys +import random +import time + + +def process_meta(file): + fi = open(file, "r") + fo = open("item-info", "w") + for line in fi: + obj = eval(line) + cat = obj["categories"][0][-1] + print(obj["asin"] + "\t" + cat, file=fo) + + +def process_reviews(file): + fi = open(file, "r") + user_map = {} + fo = open("reviews-info", "w") + for line in fi: + obj = eval(line) + userID = obj["reviewerID"] + itemID = obj["asin"] + rating = obj["overall"] + time = obj["unixReviewTime"] + print(userID + "\t" + itemID + "\t" + str(rating) + "\t" + str(time), + file=fo) + + +def manual_join(): + f_rev = open("reviews-info", "r") + user_map = {} + item_list = [] + for line in f_rev: + line = line.strip() + items = line.split("\t") + #loctime = time.localtime(float(items[-1])) + #items[-1] = time.strftime('%Y-%m-%d', loctime) + if items[0] not in user_map: + user_map[items[0]] = [] + user_map[items[0]].append(("\t".join(items), float(items[-1]))) + item_list.append(items[1]) + f_meta = open("item-info", "r") + meta_map = {} + for line in f_meta: + arr = line.strip().split("\t") + if arr[0] not in meta_map: + meta_map[arr[0]] = arr[1] + arr = line.strip().split("\t") + fo = open("jointed-new", "w") + for key in user_map: + sorted_user_bh = sorted(user_map[key], key=lambda x: x[1]) + for line, t in sorted_user_bh: + items = line.split("\t") + asin = items[1] + j = 0 + while True: + asin_neg_index = random.randint(0, len(item_list) - 1) + asin_neg = item_list[asin_neg_index] + if asin_neg == asin: + continue + items[1] = asin_neg + print("0" + "\t" + "\t".join(items) + "\t" + + meta_map[asin_neg], + file=fo) + j += 1 + if j == 1: #negative sampling frequency + break + if asin in meta_map: + print("1" + "\t" + line + "\t" + meta_map[asin], file=fo) + else: + print("1" + "\t" + line + "\t" + "default_cat", file=fo) + + +def split_test(): + fi = open("jointed-new", "r") + fo = open("jointed-new-split-info", "w") + user_count = {} + for line in fi: + line = line.strip() + user = line.split("\t")[1] + if user not in user_count: + user_count[user] = 0 + user_count[user] += 1 + fi.seek(0) + i = 0 + last_user = "A26ZDKC53OP6JD" + for line in fi: + line = line.strip() + user = line.split("\t")[1] + if user == last_user: + if i < user_count[user] - 2: # 1 + negative samples + print("20180118" + "\t" + line, file=fo) + else: + print("20190119" + "\t" + line, file=fo) + else: + last_user = user + i = 0 + if i < user_count[user] - 2: + print("20180118" + "\t" + line, file=fo) + else: + print("20190119" + "\t" + line, file=fo) + i += 1 + + +process_meta(sys.argv[1]) +process_reviews(sys.argv[2]) +manual_join() +split_test() diff --git a/modelzoo/FwFM/data/script/split_by_user.py b/modelzoo/FwFM/data/script/split_by_user.py new file mode 100644 index 00000000000..cc7988c6601 --- /dev/null +++ b/modelzoo/FwFM/data/script/split_by_user.py @@ -0,0 +1,18 @@ +import random + +fi = open("local_test", "r") +ftrain = open("local_train_splitByUser", "w") +ftest = open("local_test_splitByUser", "w") + +while True: + rand_int = random.randint(1, 10) + noclk_line = fi.readline().strip() + clk_line = fi.readline().strip() + if noclk_line == "" or clk_line == "": + break + if rand_int == 2: + print(noclk_line, file=ftest) + print(clk_line, file=ftest) + else: + print(noclk_line, file=ftrain) + print(clk_line, file=ftrain) diff --git a/modelzoo/FwFM/result/README.md b/modelzoo/FwFM/result/README.md new file mode 100644 index 00000000000..ccec44eb9a5 --- /dev/null +++ b/modelzoo/FwFM/result/README.md @@ -0,0 +1,2 @@ +# Result +Checkpoint & timeline file are default saved in this folder. diff --git a/modelzoo/FwFM/script/__init__.py b/modelzoo/FwFM/script/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/modelzoo/FwFM/script/contrib/__init__.py b/modelzoo/FwFM/script/contrib/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/modelzoo/FwFM/script/contrib/rnn.py b/modelzoo/FwFM/script/contrib/rnn.py new file mode 100644 index 00000000000..b3554993063 --- /dev/null +++ b/modelzoo/FwFM/script/contrib/rnn.py @@ -0,0 +1,1153 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +# + +# Licensed under the Apache License, Version 2.0 (the "License"); + +# you may not use this file except in compliance with the License. + +# You may obtain a copy of the License at + +# + +# http://www.apache.org/licenses/LICENSE-2.0 + +# + +# Unless required by applicable law or agreed to in writing, software + +# distributed under the License is distributed on an "AS IS" BASIS, + +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +# See the License for the specific language governing permissions and + +# limitations under the License. + +# ============================================================================== + + +"""RNN helpers for TensorFlow models. +@@bidirectional_dynamic_rnn +@@dynamic_rnn +@@raw_rnn +@@static_rnn +@@static_state_saving_rnn +@@static_bidirectional_rnn +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util import nest +import tensorflow as tf + + +def _like_rnncell_(cell): + """Checks that a given object is an RNNCell by using duck typing.""" + + conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"), + + hasattr(cell, "zero_state"), callable(cell)] + + return all(conditions) + + +# pylint: disable=protected-access + +_concat = rnn_cell_impl._concat +try: + _like_rnncell = rnn_cell_impl._like_rnncell +except Exception as e: + _like_rnncell = _like_rnncell_ + + +# pylint: enable=protected-access + + +def _transpose_batch_time(x): + """Transpose the batch and time dimensions of a Tensor. + Retains as much of the static shape information as possible. + Args: + x: A tensor of rank 2 or higher. + Returns: + x transposed along the first two dimensions. + Raises: + ValueError: if `x` is rank 1 or lower. + """ + + x_static_shape = x.get_shape() + + if x_static_shape.ndims is not None and x_static_shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2, but saw shape: %s" % + + (x, x_static_shape)) + + x_rank = array_ops.rank(x) + + x_t = array_ops.transpose( + + x, array_ops.concat( + + ([1, 0], math_ops.range(2, x_rank)), axis=0)) + + x_t.set_shape( + + tensor_shape.TensorShape([ + + x_static_shape[1].value, x_static_shape[0].value + + ]).concatenate(x_static_shape[2:])) + + return x_t + + +def _best_effort_input_batch_size(flat_input): + """Get static input batch size if available, with fallback to the dynamic one. + Args: + flat_input: An iterable of time major input Tensors of shape [max_time, + batch_size, ...]. All inputs should have compatible batch sizes. + Returns: + The batch size in Python integer if available, or a scalar Tensor otherwise. + Raises: + ValueError: if there is any input with an invalid shape. + """ + + for input_ in flat_input: + + shape = input_.shape + + if shape.ndims is None: + continue + + if shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2" % input_) + + batch_size = shape[1].value + + if batch_size is not None: + return batch_size + + # Fallback to the dynamic batch size of the first input. + + return array_ops.shape(flat_input[0])[1] + + +def _infer_state_dtype(explicit_dtype, state): + """Infer the dtype of an RNN state. + Args: + explicit_dtype: explicitly declared dtype or None. + state: RNN's hidden state. Must be a Tensor or a nested iterable containing + Tensors. + Returns: + dtype: inferred dtype of hidden state. + Raises: + ValueError: if `state` has heterogeneous dtypes or is empty. + """ + + if explicit_dtype is not None: + + return explicit_dtype + + elif nest.is_sequence(state): + + inferred_dtypes = [element.dtype for element in nest.flatten(state)] + + if not inferred_dtypes: + raise ValueError("Unable to infer dtype from empty state.") + + all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) + + if not all_same: + raise ValueError( + + "State has tensors of different inferred_dtypes. Unable to infer a " + + "single representative dtype.") + + return inferred_dtypes[0] + + else: + + return state.dtype + + +# pylint: disable=unused-argument + +def _rnn_step( + + time, sequence_length, min_sequence_length, max_sequence_length, + + zero_output, state, call_cell, state_size, skip_conditionals=False): + """Calculate one step of a dynamic RNN minibatch. + Returns an (output, state) pair conditioned on the sequence_lengths. + When skip_conditionals=False, the pseudocode is something like: + if t >= max_sequence_length: + return (zero_output, state) + if t < min_sequence_length: + return call_cell() + # Selectively output zeros or output, old state or new state depending + # on if we've finished calculating each row. + new_output, new_state = call_cell() + final_output = np.vstack([ + zero_output if time >= sequence_lengths[r] else new_output_r + for r, new_output_r in enumerate(new_output) + ]) + final_state = np.vstack([ + state[r] if time >= sequence_lengths[r] else new_state_r + for r, new_state_r in enumerate(new_state) + ]) + return (final_output, final_state) + Args: + time: Python int, the current time step + sequence_length: int32 `Tensor` vector of size [batch_size] + min_sequence_length: int32 `Tensor` scalar, min of sequence_length + max_sequence_length: int32 `Tensor` scalar, max of sequence_length + zero_output: `Tensor` vector of shape [output_size] + state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, + or a list/tuple of such tensors. + call_cell: lambda returning tuple of (new_output, new_state) where + new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. + new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. + state_size: The `cell.state_size` associated with the state. + skip_conditionals: Python bool, whether to skip using the conditional + calculations. This is useful for `dynamic_rnn`, where the input tensor + matches `max_sequence_length`, and using conditionals just slows + everything down. + Returns: + A tuple of (`final_output`, `final_state`) as given by the pseudocode above: + final_output is a `Tensor` matrix of shape [batch_size, output_size] + final_state is either a single `Tensor` matrix, or a tuple of such + matrices (matching length and shapes of input `state`). + Raises: + ValueError: If the cell returns a state tuple whose length does not match + that returned by `state_size`. + """ + + # Convert state to a list for ease of use + + flat_state = nest.flatten(state) + + flat_zero_output = nest.flatten(zero_output) + + def _copy_one_through(output, new_output): + + # If the state contains a scalar value we simply pass it through. + + if output.shape.ndims == 0: + return new_output + + copy_cond = (time >= sequence_length) + + with ops.colocate_with(new_output): + return array_ops.where(copy_cond, output, new_output) + + def _copy_some_through(flat_new_output, flat_new_state): + + # Use broadcasting select to determine which values should get + + # the previous state & zero output, and which values should get + + # a calculated state & output. + + flat_new_output = [ + + _copy_one_through(zero_output, new_output) + + for zero_output, new_output in zip(flat_zero_output, flat_new_output)] + + flat_new_state = [ + + _copy_one_through(state, new_state) + + for state, new_state in zip(flat_state, flat_new_state)] + + return flat_new_output + flat_new_state + + def _maybe_copy_some_through(): + + """Run RNN step. Pass through either no or some past state.""" + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + flat_new_state = nest.flatten(new_state) + + flat_new_output = nest.flatten(new_output) + + return control_flow_ops.cond( + + # if t < min_seq_len: calculate and return everything + + time < min_sequence_length, lambda: flat_new_output + flat_new_state, + + # else copy some of it through + + lambda: _copy_some_through(flat_new_output, flat_new_state)) + + # TODO(ebrevdo): skipping these conditionals may cause a slowdown, + + # but benefits from removing cond() and its gradient. We should + + # profile with and without this switch here. + + if skip_conditionals: + + # Instead of using conditionals, perform the selective copy at all time + + # steps. This is faster when max_seq_len is equal to the number of unrolls + + # (which is typical for dynamic_rnn). + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + new_state = nest.flatten(new_state) + + new_output = nest.flatten(new_output) + + final_output_and_state = _copy_some_through(new_output, new_state) + + else: + + empty_update = lambda: flat_zero_output + flat_state + + final_output_and_state = control_flow_ops.cond( + + # if t >= max_seq_len: copy all state through, output zeros + + time >= max_sequence_length, empty_update, + + # otherwise calculation is required: copy some or all of it through + + _maybe_copy_some_through) + + if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): + raise ValueError("Internal error: state and output were not concatenated " + + "correctly.") + + final_output = final_output_and_state[:len(flat_zero_output)] + + final_state = final_output_and_state[len(flat_zero_output):] + + for output, flat_output in zip(final_output, flat_zero_output): + output.set_shape(flat_output.get_shape()) + + for substate, flat_substate in zip(final_state, flat_state): + substate.set_shape(flat_substate.get_shape()) + + final_output = nest.pack_sequence_as( + + structure=zero_output, flat_sequence=final_output) + + final_state = nest.pack_sequence_as( + + structure=state, flat_sequence=final_state) + + return final_output, final_state + + +def _reverse_seq(input_seq, lengths): + """Reverse a list of Tensors up to specified lengths. + Args: + input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) + or nested tuples of tensors. + lengths: A `Tensor` of dimension batch_size, containing lengths for each + sequence in the batch. If "None" is specified, simply reverses + the list. + Returns: + time-reversed sequence + """ + + if lengths is None: + return list(reversed(input_seq)) + + flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) + + flat_results = [[] for _ in range(len(input_seq))] + + for sequence in zip(*flat_input_seq): + + input_shape = tensor_shape.unknown_shape( + + ndims=sequence[0].get_shape().ndims) + + for input_ in sequence: + input_shape.merge_with(input_.get_shape()) + + input_.set_shape(input_shape) + + # Join into (time, batch_size, depth) + + s_joined = array_ops.stack(sequence) + + # Reverse along dimension 0 + + s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) + + # Split again into list + + result = array_ops.unstack(s_reversed) + + for r, flat_result in zip(result, flat_results): + r.set_shape(input_shape) + + flat_result.append(r) + + results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) + + for input_, flat_result in zip(input_seq, flat_results)] + + return results + + +# +# def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, +# +# initial_state_fw=None, initial_state_bw=None, +# +# dtype=None, parallel_iterations=None, +# +# swap_memory=False, time_major=False, scope=None): +# +# """Creates a dynamic version of bidirectional recurrent neural network. +# +# +# +# Takes input and builds independent forward and backward RNNs. The input_size +# +# of forward and backward cell must match. The initial state for both directions +# +# is zero by default (but can be set optionally) and no intermediate states are +# +# ever returned -- the network is fully unrolled for the given (passed in) +# +# length(s) of the sequence(s) or completely unrolled if length(s) is not +# +# given. +# +# +# +# Args: +# +# cell_fw: An instance of RNNCell, to be used for forward direction. +# +# cell_bw: An instance of RNNCell, to be used for backward direction. +# +# inputs: The RNN inputs. +# +# If time_major == False (default), this must be a tensor of shape: +# +# `[batch_size, max_time, ...]`, or a nested tuple of such elements. +# +# If time_major == True, this must be a tensor of shape: +# +# `[max_time, batch_size, ...]`, or a nested tuple of such elements. +# +# sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, +# +# containing the actual lengths for each of the sequences in the batch. +# +# If not provided, all batch entries are assumed to be full sequences; and +# +# time reversal is applied from time `0` to `max_time` for each sequence. +# +# initial_state_fw: (optional) An initial state for the forward RNN. +# +# This must be a tensor of appropriate type and shape +# +# `[batch_size, cell_fw.state_size]`. +# +# If `cell_fw.state_size` is a tuple, this should be a tuple of +# +# tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. +# +# initial_state_bw: (optional) Same as for `initial_state_fw`, but using +# +# the corresponding properties of `cell_bw`. +# +# dtype: (optional) The data type for the initial states and expected output. +# +# Required if initial_states are not provided or RNN states have a +# +# heterogeneous dtype. +# +# parallel_iterations: (Default: 32). The number of iterations to run in +# +# parallel. Those operations which do not have any temporal dependency +# +# and can be run in parallel, will be. This parameter trades off +# +# time for space. Values >> 1 use more memory but take less time, +# +# while smaller values use less memory but computations take longer. +# +# swap_memory: Transparently swap the tensors produced in forward inference +# +# but needed for back prop from GPU to CPU. This allows training RNNs +# +# which would typically not fit on a single GPU, with very minimal (or no) +# +# performance penalty. +# +# time_major: The shape format of the `inputs` and `outputs` Tensors. +# +# If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. +# +# If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. +# +# Using `time_major = True` is a bit more efficient because it avoids +# +# transposes at the beginning and end of the RNN calculation. However, +# +# most TensorFlow data is batch-major, so by default this function +# +# accepts input and emits output in batch-major form. +# +# scope: VariableScope for the created subgraph; defaults to +# +# "bidirectional_rnn" +# +# +# +# Returns: +# +# A tuple (outputs, output_states) where: +# +# outputs: A tuple (output_fw, output_bw) containing the forward and +# +# the backward rnn output `Tensor`. +# +# If time_major == False (default), +# +# output_fw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_bw.output_size]`. +# +# If time_major == True, +# +# output_fw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_bw.output_size]`. +# +# It returns a tuple instead of a single concatenated `Tensor`, unlike +# +# in the `bidirectional_rnn`. If the concatenated one is preferred, +# +# the forward and backward outputs can be concatenated as +# +# `tf.concat(outputs, 2)`. +# +# output_states: A tuple (output_state_fw, output_state_bw) containing +# +# the forward and the backward final states of bidirectional rnn. +# +# +# +# Raises: +# +# TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. +# +# """ +# +# +# +# if not _like_rnncell(cell_fw): +# +# raise TypeError("cell_fw must be an instance of RNNCell") +# +# if not _like_rnncell(cell_bw): +# +# raise TypeError("cell_bw must be an instance of RNNCell") +# +# +# +# with vs.variable_scope(scope or "bidirectional_rnn"): +# +# # Forward direction +# +# with vs.variable_scope("fw") as fw_scope: +# +# output_fw, output_state_fw = dynamic_rnn( +# +# cell=cell_fw, inputs=inputs, sequence_length=sequence_length, +# +# initial_state=initial_state_fw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=fw_scope) +# +# +# +# # Backward direction +# +# if not time_major: +# +# time_dim = 1 +# +# batch_dim = 0 +# +# else: +# +# time_dim = 0 +# +# batch_dim = 1 +# +# +# +# def _reverse(input_, seq_lengths, seq_dim, batch_dim): +# +# if seq_lengths is not None: +# +# return array_ops.reverse_sequence( +# +# input=input_, seq_lengths=seq_lengths, +# +# seq_dim=seq_dim, batch_dim=batch_dim) +# +# else: +# +# return array_ops.reverse(input_, axis=[seq_dim]) +# +# +# +# with vs.variable_scope("bw") as bw_scope: +# +# inputs_reverse = _reverse( +# +# inputs, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# tmp, output_state_bw = dynamic_rnn( +# +# cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, +# +# initial_state=initial_state_bw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=bw_scope) +# +# +# +# output_bw = _reverse( +# +# tmp, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# +# +# outputs = (output_fw, output_bw) +# +# output_states = (output_state_fw, output_state_bw) +# +# +# +# return (outputs, output_states) +# + + +def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, + + dtype=None, parallel_iterations=None, swap_memory=False, + + time_major=False, scope=None): + """Creates a recurrent neural network specified by RNNCell `cell`. + Performs fully dynamic unrolling of `inputs`. + Example: + ```python + # create a BasicRNNCell + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] + # defining initial state + initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) + # 'state' is a tensor of shape [batch_size, cell_state_size] + outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, + initial_state=initial_state, + dtype=tf.float32) + ``` + ```python + # create 2 LSTMCells + rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] + # create a RNN cell composed sequentially of a number of RNNCells + multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) + # 'outputs' is a tensor of shape [batch_size, max_time, 256] + # 'state' is a N-tuple where N is the number of LSTMCells containing a + # tf.contrib.rnn.LSTMStateTuple for each cell + outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, + inputs=data, + dtype=tf.float32) + ``` + Args: + cell: An instance of RNNCell. + inputs: The RNN inputs. + If `time_major == False` (default), this must be a `Tensor` of shape: + `[batch_size, max_time, ...]`, or a nested tuple of such + elements. + If `time_major == True`, this must be a `Tensor` of shape: + `[max_time, batch_size, ...]`, or a nested tuple of such + elements. + This may also be a (possibly nested) tuple of Tensors satisfying + this property. The first two dimensions must match across all the inputs, + but otherwise the ranks and other shape components may differ. + In this case, input to `cell` at each time-step will replicate the + structure of these tuples, except for the time dimension (from which the + time is taken). + The input to `cell` at each time step will be a `Tensor` or (possibly + nested) tuple of Tensors each with dimensions `[batch_size, ...]`. + sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. + Used to copy-through state and zero-out outputs when past a batch + element's sequence length. So it's more for correctness than performance. + initial_state: (optional) An initial state for the RNN. + If `cell.state_size` is an integer, this must be + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. + If `cell.state_size` is a tuple, this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell.state_size`. + dtype: (optional) The data type for the initial state and expected output. + Required if initial_state is not provided or RNN state has a heterogeneous + dtype. + parallel_iterations: (Default: 32). The number of iterations to run in + parallel. Those operations which do not have any temporal dependency + and can be run in parallel, will be. This parameter trades off + time for space. Values >> 1 use more memory but take less time, + while smaller values use less memory but computations take longer. + swap_memory: Transparently swap the tensors produced in forward inference + but needed for back prop from GPU to CPU. This allows training RNNs + which would typically not fit on a single GPU, with very minimal (or no) + performance penalty. + time_major: The shape format of the `inputs` and `outputs` Tensors. + If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. + If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. + Using `time_major = True` is a bit more efficient because it avoids + transposes at the beginning and end of the RNN calculation. However, + most TensorFlow data is batch-major, so by default this function + accepts input and emits output in batch-major form. + scope: VariableScope for the created subgraph; defaults to "rnn". + Returns: + A pair (outputs, state) where: + outputs: The RNN output `Tensor`. + If time_major == False (default), this will be a `Tensor` shaped: + `[batch_size, max_time, cell.output_size]`. + If time_major == True, this will be a `Tensor` shaped: + `[max_time, batch_size, cell.output_size]`. + Note, if `cell.output_size` is a (possibly nested) tuple of integers + or `TensorShape` objects, then `outputs` will be a tuple having the + same structure as `cell.output_size`, containing Tensors having shapes + corresponding to the shape data in `cell.output_size`. + state: The final state. If `cell.state_size` is an int, this + will be shaped `[batch_size, cell.state_size]`. If it is a + `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. + If it is a (possibly nested) tuple of ints or `TensorShape`, this will + be a tuple having the corresponding shapes. If cells are `LSTMCells` + `state` will be a tuple containing a `LSTMStateTuple` for each cell. + Raises: + TypeError: If `cell` is not an instance of RNNCell. + ValueError: If inputs is None or an empty list. + """ + + if not _like_rnncell(cell): + raise TypeError("cell must be an instance of RNNCell") + + # By default, time_major==False and inputs are batch-major: shaped + + # [batch, time, depth] + + # For internal calculations, we transpose to [time, batch, depth] + + flat_input = nest.flatten(inputs) + + if not time_major: + # (B,T,D) => (T,B,D) + + flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] + + flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) + + parallel_iterations = parallel_iterations or 32 + + if sequence_length is not None: + + sequence_length = math_ops.to_int32(sequence_length) + + if sequence_length.get_shape().ndims not in (None, 1): + raise ValueError( + + "sequence_length must be a vector of length batch_size, " + + "but saw shape: %s" % sequence_length.get_shape()) + + sequence_length = array_ops.identity( # Just to find it in the graph. + + sequence_length, name="sequence_length") + + # Create a new scope in which the caching device is either + + # determined by the parent scope, or is set to place the cached + + # Variable using the same placement as for the rest of the RNN. + + with vs.variable_scope(scope or "rnn",reuse=tf.AUTO_REUSE) as varscope:#TODO:user defined reuse + + if varscope.caching_device is None: + varscope.set_caching_device(lambda op: op.device) + + batch_size = _best_effort_input_batch_size(flat_input) + + if initial_state is not None: + + state = initial_state + + else: + + if not dtype: + raise ValueError("If there is no initial_state, you must give a dtype.") + + state = cell.zero_state(batch_size, dtype) + + def _assert_has_shape(x, shape): + + x_shape = array_ops.shape(x) + + packed_shape = array_ops.stack(shape) + + return control_flow_ops.Assert( + + math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), + + ["Expected shape for Tensor %s is " % x.name, + + packed_shape, " but saw shape: ", x_shape]) + + if sequence_length is not None: + # Perform some shape validation + + with ops.control_dependencies( + + [_assert_has_shape(sequence_length, [batch_size])]): + sequence_length = array_ops.identity( + + sequence_length, name="CheckSeqLen") + + inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) + + (outputs, final_state) = _dynamic_rnn_loop( + + cell, + + inputs, + + state, + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory, + + att_scores=att_scores, + + sequence_length=sequence_length, + + dtype=dtype) + + # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. + + # If we are performing batch-major calculations, transpose output back + + # to shape [batch, time, depth] + + if not time_major: + # (T,B,D) => (B,T,D) + + outputs = nest.map_structure(_transpose_batch_time, outputs) + + return (outputs, final_state) + + +def _dynamic_rnn_loop(cell, + + inputs, + + initial_state, + + parallel_iterations, + + swap_memory, + + att_scores=None, + + sequence_length=None, + + dtype=None): + """Internal implementation of Dynamic RNN. + Args: + cell: An instance of RNNCell. + inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested + tuple of such elements. + initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if + `cell.state_size` is a tuple, then this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell.state_size`. + parallel_iterations: Positive Python int. + swap_memory: A Python boolean + sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. + dtype: (optional) Expected dtype of output. If not specified, inferred from + initial_state. + Returns: + Tuple `(final_outputs, final_state)`. + final_outputs: + A `Tensor` of shape `[time, batch_size, cell.output_size]`. If + `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` + objects, then this returns a (possibly nsted) tuple of Tensors matching + the corresponding shapes. + final_state: + A `Tensor`, or possibly nested tuple of Tensors, matching in length + and shapes to `initial_state`. + Raises: + ValueError: If the input depth cannot be inferred via shape inference + from the inputs. + """ + + state = initial_state + + assert isinstance(parallel_iterations, int), "parallel_iterations must be int" + + state_size = cell.state_size + + flat_input = nest.flatten(inputs) + + flat_output_size = nest.flatten(cell.output_size) + + # Construct an initial output + + input_shape = array_ops.shape(flat_input[0]) + + time_steps = input_shape[0] + + batch_size = _best_effort_input_batch_size(flat_input) + + inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) + + for input_ in flat_input) + + const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] + + for shape in inputs_got_shape: + + if not shape[2:].is_fully_defined(): + raise ValueError( + + "Input size (depth of inputs) must be accessible via shape inference," + + " but saw value None.") + + got_time_steps = shape[0].value + + got_batch_size = shape[1].value + + if const_time_steps != got_time_steps: + raise ValueError( + + "Time steps is not the same for all the elements in the input in a " + + "batch.") + + if const_batch_size != got_batch_size: + raise ValueError( + + "Batch_size is not the same for all the elements in the input.") + + # Prepare dynamic conditional copying of state & output + + def _create_zero_arrays(size): + + size = _concat(batch_size, size) + + return array_ops.zeros( + + array_ops.stack(size), _infer_state_dtype(dtype, state)) + + flat_zero_output = tuple(_create_zero_arrays(output) + + for output in flat_output_size) + + zero_output = nest.pack_sequence_as(structure=cell.output_size, + + flat_sequence=flat_zero_output) + + if sequence_length is not None: + min_sequence_length = math_ops.reduce_min(sequence_length) + + max_sequence_length = math_ops.reduce_max(sequence_length) + + time = array_ops.constant(0, dtype=dtypes.int32, name="time") + + with ops.name_scope("dynamic_rnn") as scope: + + base_name = scope + + def _create_ta(name, dtype): + + return tensor_array_ops.TensorArray(dtype=dtype, + + size=time_steps, + + tensor_array_name=base_name + name) + + output_ta = tuple(_create_ta("output_%d" % i, + + _infer_state_dtype(dtype, state)) + + for i in range(len(flat_output_size))) + + input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) + + for i in range(len(flat_input))) + + input_ta = tuple(ta.unstack(input_) + + for ta, input_ in zip(input_ta, flat_input)) + + def _time_step(time, output_ta_t, state, att_scores=None): + + """Take a time step of the dynamic RNN. + Args: + time: int32 scalar Tensor. + output_ta_t: List of `TensorArray`s that represent the output. + state: nested tuple of vector tensors that represent the state. + Returns: + The tuple (time + 1, output_ta_t with updated flow, new_state). + """ + + input_t = tuple(ta.read(time) for ta in input_ta) + + # Restore some shape information + + for input_, shape in zip(input_t, inputs_got_shape): + input_.set_shape(shape[1:]) + + input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) + + if att_scores is not None: + + att_score = att_scores[:, time, :] + + call_cell = lambda: cell(input_t, state, att_score) + + else: + + call_cell = lambda: cell(input_t, state) + + if sequence_length is not None: + + (output, new_state) = _rnn_step( + + time=time, + + sequence_length=sequence_length, + + min_sequence_length=min_sequence_length, + + max_sequence_length=max_sequence_length, + + zero_output=zero_output, + + state=state, + + call_cell=call_cell, + + state_size=state_size, + + skip_conditionals=True) + + else: + + (output, new_state) = call_cell() + + # Pack state if using state tuples + + output = nest.flatten(output) + + output_ta_t = tuple( + + ta.write(time, out) for ta, out in zip(output_ta_t, output)) + + if att_scores is not None: + + return (time + 1, output_ta_t, new_state, att_scores) + + else: + + return (time + 1, output_ta_t, new_state) + + if att_scores is not None: + + _, output_final_ta, final_state, _ = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state, att_scores), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + else: + + _, output_final_ta, final_state = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + # Unpack final output if not using output tuples. + + final_outputs = tuple(ta.stack() for ta in output_final_ta) + + # Restore some shape information + + for output, output_size in zip(final_outputs, flat_output_size): + shape = _concat( + + [const_time_steps, const_batch_size], output_size, static=True) + + output.set_shape(shape) + + final_outputs = nest.pack_sequence_as( + + structure=cell.output_size, flat_sequence=final_outputs) + + return (final_outputs, final_state) \ No newline at end of file diff --git a/modelzoo/FwFM/script/contrib/rnn_v2.py b/modelzoo/FwFM/script/contrib/rnn_v2.py new file mode 100644 index 00000000000..a2bd625cd8b --- /dev/null +++ b/modelzoo/FwFM/script/contrib/rnn_v2.py @@ -0,0 +1,1452 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +# + +# Licensed under the Apache License, Version 2.0 (the "License"); + +# you may not use this file except in compliance with the License. + +# You may obtain a copy of the License at + +# + +# http://www.apache.org/licenses/LICENSE-2.0 + +# + +# Unless required by applicable law or agreed to in writing, software + +# distributed under the License is distributed on an "AS IS" BASIS, + +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +# See the License for the specific language governing permissions and + +# limitations under the License. + +# ============================================================================== + + +"""RNN helpers for TensorFlow models. + + + + + +@@bidirectional_dynamic_rnn + +@@dynamic_rnn + +@@raw_rnn + +@@static_rnn + +@@static_state_saving_rnn + +@@static_bidirectional_rnn + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util import nest +import tensorflow as tf + + +def _like_rnncell_(cell): + """Checks that a given object is an RNNCell by using duck typing.""" + + conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"), + + hasattr(cell, "zero_state"), callable(cell)] + + return all(conditions) + + +# pylint: disable=protected-access + +_concat = rnn_cell_impl._concat +try: + _like_rnncell = rnn_cell_impl._like_rnncell +except: + _like_rnncell = _like_rnncell_ + + +# pylint: enable=protected-access + + +def _transpose_batch_time(x): + """Transpose the batch and time dimensions of a Tensor. + + + + Retains as much of the static shape information as possible. + + + + Args: + + x: A tensor of rank 2 or higher. + + + + Returns: + + x transposed along the first two dimensions. + + + + Raises: + + ValueError: if `x` is rank 1 or lower. + + """ + + x_static_shape = x.get_shape() + + if x_static_shape.ndims is not None and x_static_shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2, but saw shape: %s" % + + (x, x_static_shape)) + + x_rank = array_ops.rank(x) + + x_t = array_ops.transpose( + + x, array_ops.concat( + + ([1, 0], math_ops.range(2, x_rank)), axis=0)) + + x_t.set_shape( + + tensor_shape.TensorShape([ + + x_static_shape[1], x_static_shape[0] + + ]).concatenate(x_static_shape[2:])) + + return x_t + + +def _best_effort_input_batch_size(flat_input): + """Get static input batch size if available, with fallback to the dynamic one. + + + + Args: + + flat_input: An iterable of time major input Tensors of shape [max_time, + + batch_size, ...]. All inputs should have compatible batch sizes. + + + + Returns: + + The batch size in Python integer if available, or a scalar Tensor otherwise. + + + + Raises: + + ValueError: if there is any input with an invalid shape. + + """ + + for input_ in flat_input: + + shape = input_.shape + + if shape.ndims is None: + continue + + if shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2" % input_) + + batch_size = shape[1] + + if batch_size is not None: + return batch_size + + # Fallback to the dynamic batch size of the first input. + + return array_ops.shape(flat_input[0])[1] + + +def _infer_state_dtype(explicit_dtype, state): + """Infer the dtype of an RNN state. + + + + Args: + + explicit_dtype: explicitly declared dtype or None. + + state: RNN's hidden state. Must be a Tensor or a nested iterable containing + + Tensors. + + + + Returns: + + dtype: inferred dtype of hidden state. + + + + Raises: + + ValueError: if `state` has heterogeneous dtypes or is empty. + + """ + + if explicit_dtype is not None: + + return explicit_dtype + + elif nest.is_sequence(state): + + inferred_dtypes = [element.dtype for element in nest.flatten(state)] + + if not inferred_dtypes: + raise ValueError("Unable to infer dtype from empty state.") + + all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) + + if not all_same: + raise ValueError( + + "State has tensors of different inferred_dtypes. Unable to infer a " + + "single representative dtype.") + + return inferred_dtypes[0] + + else: + + return state.dtype + + +# pylint: disable=unused-argument + +def _rnn_step( + + time, sequence_length, min_sequence_length, max_sequence_length, + + zero_output, state, call_cell, state_size, skip_conditionals=False): + """Calculate one step of a dynamic RNN minibatch. + + + + Returns an (output, state) pair conditioned on the sequence_lengths. + + When skip_conditionals=False, the pseudocode is something like: + + + + if t >= max_sequence_length: + + return (zero_output, state) + + if t < min_sequence_length: + + return call_cell() + + + + # Selectively output zeros or output, old state or new state depending + + # on if we've finished calculating each row. + + new_output, new_state = call_cell() + + final_output = np.vstack([ + + zero_output if time >= sequence_lengths[r] else new_output_r + + for r, new_output_r in enumerate(new_output) + + ]) + + final_state = np.vstack([ + + state[r] if time >= sequence_lengths[r] else new_state_r + + for r, new_state_r in enumerate(new_state) + + ]) + + return (final_output, final_state) + + + + Args: + + time: Python int, the current time step + + sequence_length: int32 `Tensor` vector of size [batch_size] + + min_sequence_length: int32 `Tensor` scalar, min of sequence_length + + max_sequence_length: int32 `Tensor` scalar, max of sequence_length + + zero_output: `Tensor` vector of shape [output_size] + + state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, + + or a list/tuple of such tensors. + + call_cell: lambda returning tuple of (new_output, new_state) where + + new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. + + new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. + + state_size: The `cell.state_size` associated with the state. + + skip_conditionals: Python bool, whether to skip using the conditional + + calculations. This is useful for `dynamic_rnn`, where the input tensor + + matches `max_sequence_length`, and using conditionals just slows + + everything down. + + + + Returns: + + A tuple of (`final_output`, `final_state`) as given by the pseudocode above: + + final_output is a `Tensor` matrix of shape [batch_size, output_size] + + final_state is either a single `Tensor` matrix, or a tuple of such + + matrices (matching length and shapes of input `state`). + + + + Raises: + + ValueError: If the cell returns a state tuple whose length does not match + + that returned by `state_size`. + + """ + + # Convert state to a list for ease of use + + flat_state = nest.flatten(state) + + flat_zero_output = nest.flatten(zero_output) + + def _copy_one_through(output, new_output): + + # If the state contains a scalar value we simply pass it through. + + if output.shape.ndims == 0: + return new_output + + copy_cond = (time >= sequence_length) + + with ops.colocate_with(new_output): + return array_ops.where(copy_cond, output, new_output) + + def _copy_some_through(flat_new_output, flat_new_state): + + # Use broadcasting select to determine which values should get + + # the previous state & zero output, and which values should get + + # a calculated state & output. + + flat_new_output = [ + + _copy_one_through(zero_output, new_output) + + for zero_output, new_output in zip(flat_zero_output, flat_new_output)] + + flat_new_state = [ + + _copy_one_through(state, new_state) + + for state, new_state in zip(flat_state, flat_new_state)] + + return flat_new_output + flat_new_state + + def _maybe_copy_some_through(): + + """Run RNN step. Pass through either no or some past state.""" + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + flat_new_state = nest.flatten(new_state) + + flat_new_output = nest.flatten(new_output) + + return control_flow_ops.cond( + + # if t < min_seq_len: calculate and return everything + + time < min_sequence_length, lambda: flat_new_output + flat_new_state, + + # else copy some of it through + + lambda: _copy_some_through(flat_new_output, flat_new_state)) + + # TODO(ebrevdo): skipping these conditionals may cause a slowdown, + + # but benefits from removing cond() and its gradient. We should + + # profile with and without this switch here. + + if skip_conditionals: + + # Instead of using conditionals, perform the selective copy at all time + + # steps. This is faster when max_seq_len is equal to the number of unrolls + + # (which is typical for dynamic_rnn). + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + new_state = nest.flatten(new_state) + + new_output = nest.flatten(new_output) + + final_output_and_state = _copy_some_through(new_output, new_state) + + else: + + empty_update = lambda: flat_zero_output + flat_state + + final_output_and_state = control_flow_ops.cond( + + # if t >= max_seq_len: copy all state through, output zeros + + time >= max_sequence_length, empty_update, + + # otherwise calculation is required: copy some or all of it through + + _maybe_copy_some_through) + + if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): + raise ValueError("Internal error: state and output were not concatenated " + + "correctly.") + + final_output = final_output_and_state[:len(flat_zero_output)] + + final_state = final_output_and_state[len(flat_zero_output):] + + for output, flat_output in zip(final_output, flat_zero_output): + output.set_shape(flat_output.get_shape()) + + for substate, flat_substate in zip(final_state, flat_state): + substate.set_shape(flat_substate.get_shape()) + + final_output = nest.pack_sequence_as( + + structure=zero_output, flat_sequence=final_output) + + final_state = nest.pack_sequence_as( + + structure=state, flat_sequence=final_state) + + return final_output, final_state + + +def _reverse_seq(input_seq, lengths): + """Reverse a list of Tensors up to specified lengths. + + + + Args: + + input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) + + or nested tuples of tensors. + + lengths: A `Tensor` of dimension batch_size, containing lengths for each + + sequence in the batch. If "None" is specified, simply reverses + + the list. + + + + Returns: + + time-reversed sequence + + """ + + if lengths is None: + return list(reversed(input_seq)) + + flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) + + flat_results = [[] for _ in range(len(input_seq))] + + for sequence in zip(*flat_input_seq): + + input_shape = tensor_shape.unknown_shape( + + ndims=sequence[0].get_shape().ndims) + + for input_ in sequence: + input_shape.merge_with(input_.get_shape()) + + input_.set_shape(input_shape) + + # Join into (time, batch_size, depth) + + s_joined = array_ops.stack(sequence) + + # Reverse along dimension 0 + + s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) + + # Split again into list + + result = array_ops.unstack(s_reversed) + + for r, flat_result in zip(result, flat_results): + r.set_shape(input_shape) + + flat_result.append(r) + + results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) + + for input_, flat_result in zip(input_seq, flat_results)] + + return results + + +# +# def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, +# +# initial_state_fw=None, initial_state_bw=None, +# +# dtype=None, parallel_iterations=None, +# +# swap_memory=False, time_major=False, scope=None): +# +# """Creates a dynamic version of bidirectional recurrent neural network. +# +# +# +# Takes input and builds independent forward and backward RNNs. The input_size +# +# of forward and backward cell must match. The initial state for both directions +# +# is zero by default (but can be set optionally) and no intermediate states are +# +# ever returned -- the network is fully unrolled for the given (passed in) +# +# length(s) of the sequence(s) or completely unrolled if length(s) is not +# +# given. +# +# +# +# Args: +# +# cell_fw: An instance of RNNCell, to be used for forward direction. +# +# cell_bw: An instance of RNNCell, to be used for backward direction. +# +# inputs: The RNN inputs. +# +# If time_major == False (default), this must be a tensor of shape: +# +# `[batch_size, max_time, ...]`, or a nested tuple of such elements. +# +# If time_major == True, this must be a tensor of shape: +# +# `[max_time, batch_size, ...]`, or a nested tuple of such elements. +# +# sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, +# +# containing the actual lengths for each of the sequences in the batch. +# +# If not provided, all batch entries are assumed to be full sequences; and +# +# time reversal is applied from time `0` to `max_time` for each sequence. +# +# initial_state_fw: (optional) An initial state for the forward RNN. +# +# This must be a tensor of appropriate type and shape +# +# `[batch_size, cell_fw.state_size]`. +# +# If `cell_fw.state_size` is a tuple, this should be a tuple of +# +# tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. +# +# initial_state_bw: (optional) Same as for `initial_state_fw`, but using +# +# the corresponding properties of `cell_bw`. +# +# dtype: (optional) The data type for the initial states and expected output. +# +# Required if initial_states are not provided or RNN states have a +# +# heterogeneous dtype. +# +# parallel_iterations: (Default: 32). The number of iterations to run in +# +# parallel. Those operations which do not have any temporal dependency +# +# and can be run in parallel, will be. This parameter trades off +# +# time for space. Values >> 1 use more memory but take less time, +# +# while smaller values use less memory but computations take longer. +# +# swap_memory: Transparently swap the tensors produced in forward inference +# +# but needed for back prop from GPU to CPU. This allows training RNNs +# +# which would typically not fit on a single GPU, with very minimal (or no) +# +# performance penalty. +# +# time_major: The shape format of the `inputs` and `outputs` Tensors. +# +# If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. +# +# If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. +# +# Using `time_major = True` is a bit more efficient because it avoids +# +# transposes at the beginning and end of the RNN calculation. However, +# +# most TensorFlow data is batch-major, so by default this function +# +# accepts input and emits output in batch-major form. +# +# scope: VariableScope for the created subgraph; defaults to +# +# "bidirectional_rnn" +# +# +# +# Returns: +# +# A tuple (outputs, output_states) where: +# +# outputs: A tuple (output_fw, output_bw) containing the forward and +# +# the backward rnn output `Tensor`. +# +# If time_major == False (default), +# +# output_fw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_bw.output_size]`. +# +# If time_major == True, +# +# output_fw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_bw.output_size]`. +# +# It returns a tuple instead of a single concatenated `Tensor`, unlike +# +# in the `bidirectional_rnn`. If the concatenated one is preferred, +# +# the forward and backward outputs can be concatenated as +# +# `tf.concat(outputs, 2)`. +# +# output_states: A tuple (output_state_fw, output_state_bw) containing +# +# the forward and the backward final states of bidirectional rnn. +# +# +# +# Raises: +# +# TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. +# +# """ +# +# +# +# if not _like_rnncell(cell_fw): +# +# raise TypeError("cell_fw must be an instance of RNNCell") +# +# if not _like_rnncell(cell_bw): +# +# raise TypeError("cell_bw must be an instance of RNNCell") +# +# +# +# with vs.variable_scope(scope or "bidirectional_rnn"): +# +# # Forward direction +# +# with vs.variable_scope("fw") as fw_scope: +# +# output_fw, output_state_fw = dynamic_rnn( +# +# cell=cell_fw, inputs=inputs, sequence_length=sequence_length, +# +# initial_state=initial_state_fw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=fw_scope) +# +# +# +# # Backward direction +# +# if not time_major: +# +# time_dim = 1 +# +# batch_dim = 0 +# +# else: +# +# time_dim = 0 +# +# batch_dim = 1 +# +# +# +# def _reverse(input_, seq_lengths, seq_dim, batch_dim): +# +# if seq_lengths is not None: +# +# return array_ops.reverse_sequence( +# +# input=input_, seq_lengths=seq_lengths, +# +# seq_dim=seq_dim, batch_dim=batch_dim) +# +# else: +# +# return array_ops.reverse(input_, axis=[seq_dim]) +# +# +# +# with vs.variable_scope("bw") as bw_scope: +# +# inputs_reverse = _reverse( +# +# inputs, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# tmp, output_state_bw = dynamic_rnn( +# +# cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, +# +# initial_state=initial_state_bw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=bw_scope) +# +# +# +# output_bw = _reverse( +# +# tmp, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# +# +# outputs = (output_fw, output_bw) +# +# output_states = (output_state_fw, output_state_bw) +# +# +# +# return (outputs, output_states) +# + + +def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, + + dtype=None, parallel_iterations=None, swap_memory=False, + + time_major=False, scope=None): + """Creates a recurrent neural network specified by RNNCell `cell`. + + + + Performs fully dynamic unrolling of `inputs`. + + + + Example: + + + + ```python + + # create a BasicRNNCell + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + + + + # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] + + + + # defining initial state + + initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) + + + + # 'state' is a tensor of shape [batch_size, cell_state_size] + + outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, + + initial_state=initial_state, + + dtype=tf.float32) + + ``` + + + + ```python + + # create 2 LSTMCells + + rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] + + + + # create a RNN cell composed sequentially of a number of RNNCells + + multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) + + + + # 'outputs' is a tensor of shape [batch_size, max_time, 256] + + # 'state' is a N-tuple where N is the number of LSTMCells containing a + + # tf.contrib.rnn.LSTMStateTuple for each cell + + outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, + + inputs=data, + + dtype=tf.float32) + + ``` + + + + + + Args: + + cell: An instance of RNNCell. + + inputs: The RNN inputs. + + If `time_major == False` (default), this must be a `Tensor` of shape: + + `[batch_size, max_time, ...]`, or a nested tuple of such + + elements. + + If `time_major == True`, this must be a `Tensor` of shape: + + `[max_time, batch_size, ...]`, or a nested tuple of such + + elements. + + This may also be a (possibly nested) tuple of Tensors satisfying + + this property. The first two dimensions must match across all the inputs, + + but otherwise the ranks and other shape components may differ. + + In this case, input to `cell` at each time-step will replicate the + + structure of these tuples, except for the time dimension (from which the + + time is taken). + + The input to `cell` at each time step will be a `Tensor` or (possibly + + nested) tuple of Tensors each with dimensions `[batch_size, ...]`. + + sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. + + Used to copy-through state and zero-out outputs when past a batch + + element's sequence length. So it's more for correctness than performance. + + initial_state: (optional) An initial state for the RNN. + + If `cell.state_size` is an integer, this must be + + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. + + If `cell.state_size` is a tuple, this should be a tuple of + + tensors having shapes `[batch_size, s] for s in cell.state_size`. + + dtype: (optional) The data type for the initial state and expected output. + + Required if initial_state is not provided or RNN state has a heterogeneous + + dtype. + + parallel_iterations: (Default: 32). The number of iterations to run in + + parallel. Those operations which do not have any temporal dependency + + and can be run in parallel, will be. This parameter trades off + + time for space. Values >> 1 use more memory but take less time, + + while smaller values use less memory but computations take longer. + + swap_memory: Transparently swap the tensors produced in forward inference + + but needed for back prop from GPU to CPU. This allows training RNNs + + which would typically not fit on a single GPU, with very minimal (or no) + + performance penalty. + + time_major: The shape format of the `inputs` and `outputs` Tensors. + + If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. + + If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. + + Using `time_major = True` is a bit more efficient because it avoids + + transposes at the beginning and end of the RNN calculation. However, + + most TensorFlow data is batch-major, so by default this function + + accepts input and emits output in batch-major form. + + scope: VariableScope for the created subgraph; defaults to "rnn". + + + + Returns: + + A pair (outputs, state) where: + + + + outputs: The RNN output `Tensor`. + + + + If time_major == False (default), this will be a `Tensor` shaped: + + `[batch_size, max_time, cell.output_size]`. + + + + If time_major == True, this will be a `Tensor` shaped: + + `[max_time, batch_size, cell.output_size]`. + + + + Note, if `cell.output_size` is a (possibly nested) tuple of integers + + or `TensorShape` objects, then `outputs` will be a tuple having the + + same structure as `cell.output_size`, containing Tensors having shapes + + corresponding to the shape data in `cell.output_size`. + + + + state: The final state. If `cell.state_size` is an int, this + + will be shaped `[batch_size, cell.state_size]`. If it is a + + `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. + + If it is a (possibly nested) tuple of ints or `TensorShape`, this will + + be a tuple having the corresponding shapes. If cells are `LSTMCells` + + `state` will be a tuple containing a `LSTMStateTuple` for each cell. + + + + Raises: + + TypeError: If `cell` is not an instance of RNNCell. + + ValueError: If inputs is None or an empty list. + + """ + + if not _like_rnncell(cell): + raise TypeError("cell must be an instance of RNNCell") + + # By default, time_major==False and inputs are batch-major: shaped + + # [batch, time, depth] + + # For internal calculations, we transpose to [time, batch, depth] + + flat_input = nest.flatten(inputs) + + if not time_major: + # (B,T,D) => (T,B,D) + + flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] + + flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) + + parallel_iterations = parallel_iterations or 32 + + if sequence_length is not None: + + sequence_length = math_ops.to_int32(sequence_length) + + if sequence_length.get_shape().ndims not in (None, 1): + raise ValueError( + + "sequence_length must be a vector of length batch_size, " + + "but saw shape: %s" % sequence_length.get_shape()) + + sequence_length = array_ops.identity( # Just to find it in the graph. + + sequence_length, name="sequence_length") + + # Create a new scope in which the caching device is either + + # determined by the parent scope, or is set to place the cached + + # Variable using the same placement as for the rest of the RNN. + + try: + resue = tf.AUTO_REUSE + except: + resue = tf.compat.v1.AUTO_REUSE + + with vs.variable_scope(scope or "rnn",reuse=resue) as varscope:#TODO:user defined reuse + + if varscope.caching_device is None: + varscope.set_caching_device(lambda op: op.device) + + batch_size = _best_effort_input_batch_size(flat_input) + + if initial_state is not None: + + state = initial_state + + else: + + if not dtype: + raise ValueError("If there is no initial_state, you must give a dtype.") + + state = cell.zero_state(batch_size, dtype) + + def _assert_has_shape(x, shape): + + x_shape = array_ops.shape(x) + + packed_shape = array_ops.stack(shape) + + return control_flow_ops.Assert( + + math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), + + ["Expected shape for Tensor %s is " % x.name, + + packed_shape, " but saw shape: ", x_shape]) + + if sequence_length is not None: + # Perform some shape validation + + with ops.control_dependencies( + + [_assert_has_shape(sequence_length, [batch_size])]): + sequence_length = array_ops.identity( + + sequence_length, name="CheckSeqLen") + + inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) + + (outputs, final_state) = _dynamic_rnn_loop( + + cell, + + inputs, + + state, + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory, + + att_scores=att_scores, + + sequence_length=sequence_length, + + dtype=dtype) + + # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. + + # If we are performing batch-major calculations, transpose output back + + # to shape [batch, time, depth] + + if not time_major: + # (T,B,D) => (B,T,D) + + outputs = nest.map_structure(_transpose_batch_time, outputs) + + return (outputs, final_state) + + +def _dynamic_rnn_loop(cell, + + inputs, + + initial_state, + + parallel_iterations, + + swap_memory, + + att_scores=None, + + sequence_length=None, + + dtype=None): + """Internal implementation of Dynamic RNN. + + + + Args: + + cell: An instance of RNNCell. + + inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested + + tuple of such elements. + + initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if + + `cell.state_size` is a tuple, then this should be a tuple of + + tensors having shapes `[batch_size, s] for s in cell.state_size`. + + parallel_iterations: Positive Python int. + + swap_memory: A Python boolean + + sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. + + dtype: (optional) Expected dtype of output. If not specified, inferred from + + initial_state. + + + + Returns: + + Tuple `(final_outputs, final_state)`. + + final_outputs: + + A `Tensor` of shape `[time, batch_size, cell.output_size]`. If + + `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` + + objects, then this returns a (possibly nsted) tuple of Tensors matching + + the corresponding shapes. + + final_state: + + A `Tensor`, or possibly nested tuple of Tensors, matching in length + + and shapes to `initial_state`. + + + + Raises: + + ValueError: If the input depth cannot be inferred via shape inference + + from the inputs. + + """ + + state = initial_state + + assert isinstance(parallel_iterations, int), "parallel_iterations must be int" + + state_size = cell.state_size + + flat_input = nest.flatten(inputs) + + flat_output_size = nest.flatten(cell.output_size) + + # Construct an initial output + + input_shape = array_ops.shape(flat_input[0]) + + time_steps = input_shape[0] + + batch_size = _best_effort_input_batch_size(flat_input) + + inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) + + for input_ in flat_input) + + const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] + + for shape in inputs_got_shape: + + if not shape[2:].is_fully_defined(): + raise ValueError( + + "Input size (depth of inputs) must be accessible via shape inference," + + " but saw value None.") + + got_time_steps = shape[0] + + got_batch_size = shape[1] + + if const_time_steps != got_time_steps: + raise ValueError( + + "Time steps is not the same for all the elements in the input in a " + + "batch.") + + if const_batch_size != got_batch_size: + raise ValueError( + + "Batch_size is not the same for all the elements in the input.") + + # Prepare dynamic conditional copying of state & output + + def _create_zero_arrays(size): + + size = _concat(batch_size, size) + + return array_ops.zeros( + + array_ops.stack(size), _infer_state_dtype(dtype, state)) + + flat_zero_output = tuple(_create_zero_arrays(output) + + for output in flat_output_size) + + zero_output = nest.pack_sequence_as(structure=cell.output_size, + + flat_sequence=flat_zero_output) + + if sequence_length is not None: + min_sequence_length = math_ops.reduce_min(sequence_length) + + max_sequence_length = math_ops.reduce_max(sequence_length) + + time = array_ops.constant(0, dtype=dtypes.int32, name="time") + + with ops.name_scope("dynamic_rnn") as scope: + + base_name = scope + + def _create_ta(name, dtype): + + return tensor_array_ops.TensorArray(dtype=dtype, + + size=time_steps, + + tensor_array_name=base_name + name) + + output_ta = tuple(_create_ta("output_%d" % i, + + _infer_state_dtype(dtype, state)) + + for i in range(len(flat_output_size))) + + input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) + + for i in range(len(flat_input))) + + input_ta = tuple(ta.unstack(input_) + + for ta, input_ in zip(input_ta, flat_input)) + + def _time_step(time, output_ta_t, state, att_scores=None): + + """Take a time step of the dynamic RNN. + + + + Args: + + time: int32 scalar Tensor. + + output_ta_t: List of `TensorArray`s that represent the output. + + state: nested tuple of vector tensors that represent the state. + + + + Returns: + + The tuple (time + 1, output_ta_t with updated flow, new_state). + + """ + + input_t = tuple(ta.read(time) for ta in input_ta) + + # Restore some shape information + + for input_, shape in zip(input_t, inputs_got_shape): + input_.set_shape(shape[1:]) + + input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) + + if att_scores is not None: + + att_score = att_scores[:, time, :] + + call_cell = lambda: cell(input_t, state, att_score) + + else: + + call_cell = lambda: cell(input_t, state) + + if sequence_length is not None: + + (output, new_state) = _rnn_step( + + time=time, + + sequence_length=sequence_length, + + min_sequence_length=min_sequence_length, + + max_sequence_length=max_sequence_length, + + zero_output=zero_output, + + state=state, + + call_cell=call_cell, + + state_size=state_size, + + skip_conditionals=True) + + else: + + (output, new_state) = call_cell() + + # Pack state if using state tuples + + output = nest.flatten(output) + + output_ta_t = tuple( + + ta.write(time, out) for ta, out in zip(output_ta_t, output)) + + if att_scores is not None: + + return (time + 1, output_ta_t, new_state, att_scores) + + else: + + return (time + 1, output_ta_t, new_state) + + if att_scores is not None: + + _, output_final_ta, final_state, _ = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state, att_scores), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + else: + + _, output_final_ta, final_state = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + # Unpack final output if not using output tuples. + + final_outputs = tuple(ta.stack() for ta in output_final_ta) + + # Restore some shape information + + for output, output_size in zip(final_outputs, flat_output_size): + shape = _concat( + + [const_time_steps, const_batch_size], output_size, static=True) + + output.set_shape(shape) + + final_outputs = nest.pack_sequence_as( + + structure=cell.output_size, flat_sequence=final_outputs) + + return (final_outputs, final_state) diff --git a/modelzoo/FwFM/script/contrib/utils.py b/modelzoo/FwFM/script/contrib/utils.py new file mode 100644 index 00000000000..692f4ef6e89 --- /dev/null +++ b/modelzoo/FwFM/script/contrib/utils.py @@ -0,0 +1,378 @@ +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.ops.rnn_cell import * +from tensorflow.python.util import nest + +_BIAS_VARIABLE_NAME = "bias" + +_WEIGHTS_VARIABLE_NAME = "kernel" + + +class _Linear_(object): + """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. + + + + Args: + + args: a 2D Tensor or a list of 2D, batch x n, Tensors. + + output_size: int, second dimension of weight variable. + + dtype: data type for variables. + + build_bias: boolean, whether to build a bias variable. + + bias_initializer: starting value to initialize the bias + + (default is all zeros). + + kernel_initializer: starting value to initialize the weight. + + + + Raises: + + ValueError: if inputs_shape is wrong. + + """ + + def __init__(self, + + args, + + output_size, + + build_bias, + + bias_initializer=None, + + kernel_initializer=None): + + self._build_bias = build_bias + + if args is None or (nest.is_sequence(args) and not args): + raise ValueError("`args` must be specified") + + if not nest.is_sequence(args): + + args = [args] + + self._is_sequence = False + + else: + + self._is_sequence = True + + # Calculate the total size of arguments on dimension 1. + + total_arg_size = 0 + + shapes = [a.get_shape() for a in args] + + for shape in shapes: + + if shape.ndims != 2: + raise ValueError( + "linear is expecting 2D arguments: %s" % shapes) + + if shape[1] is None: + + raise ValueError("linear expects shape[1] to be provided for shape %s, " + + "but saw %s" % (shape, shape[1])) + + else: + + total_arg_size += int(shape[1])#.value + + dtype = [a.dtype for a in args][0] + + scope = vs.get_variable_scope() + + with vs.variable_scope(scope) as outer_scope: + + self._weights = vs.get_variable( + + _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], + + dtype=dtype, + + initializer=kernel_initializer) + + if build_bias: + + with vs.variable_scope(outer_scope) as inner_scope: + + inner_scope.set_partitioner(None) + + if bias_initializer is None: + bias_initializer = init_ops.constant_initializer( + 0.0, dtype=dtype) + + self._biases = vs.get_variable( + + _BIAS_VARIABLE_NAME, [output_size], + + dtype=dtype, + + initializer=bias_initializer) + + def __call__(self, args): + + if not self._is_sequence: + args = [args] + + if len(args) == 1: + + res = math_ops.matmul(args[0], self._weights) + + else: + + res = math_ops.matmul(array_ops.concat(args, 1), self._weights) + + if self._build_bias: + res = nn_ops.bias_add(res, self._biases) + + return res + + +try: + from tensorflow.python.ops.rnn_cell_impl import _Linear +except: + _Linear = _Linear_ + + +class QAAttGRUCell(RNNCell): + """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). + + Args: + + num_units: int, The number of units in the GRU cell. + + activation: Nonlinearity to use. Default: `tanh`. + + reuse: (optional) Python boolean describing whether to reuse variables + + in an existing scope. If not `True`, and the existing scope already has + + the given variables, an error is raised. + + kernel_initializer: (optional) The initializer to use for the weight and + + projection matrices. + + bias_initializer: (optional) The initializer to use for the bias. + + """ + + def __init__(self, + + num_units, + + activation=None, + + reuse=None, + + kernel_initializer=None, + + bias_initializer=None): + + super(QAAttGRUCell, self).__init__(_reuse=reuse) + + self._num_units = num_units + + self._activation = activation or math_ops.tanh + + self._kernel_initializer = kernel_initializer + + self._bias_initializer = bias_initializer + + self._gate_linear = None + + self._candidate_linear = None + + @property + def state_size(self): + + return self._num_units + + @property + def output_size(self): + + return self._num_units + + def __call__(self, inputs, state, att_score): + + return self.call(inputs, state, att_score) + + def call(self, inputs, state, att_score=None): + """Gated recurrent unit (GRU) with nunits cells.""" + + if self._gate_linear is None: + + bias_ones = self._bias_initializer + + if self._bias_initializer is None: + bias_ones = init_ops.constant_initializer( + 1.0, dtype=inputs.dtype) + + with vs.variable_scope("gates"): # Reset gate and update gate. + + self._gate_linear = _Linear( + + [inputs, state], + + 2 * self._num_units, + + True, + + bias_initializer=bias_ones, + + kernel_initializer=self._kernel_initializer) + + value = math_ops.sigmoid(self._gate_linear([inputs, state])) + + r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) + + r_state = r * state + + if self._candidate_linear is None: + with vs.variable_scope("candidate"): + self._candidate_linear = _Linear( + + [inputs, r_state], + + self._num_units, + + True, + + bias_initializer=self._bias_initializer, + + kernel_initializer=self._kernel_initializer) + + c = self._activation(self._candidate_linear([inputs, r_state])) + + new_h = (1. - att_score) * state + att_score * c + + return new_h, new_h + + +class VecAttGRUCell(RNNCell): + """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). + + Args: + + num_units: int, The number of units in the GRU cell. + + activation: Nonlinearity to use. Default: `tanh`. + + reuse: (optional) Python boolean describing whether to reuse variables + + in an existing scope. If not `True`, and the existing scope already has + + the given variables, an error is raised. + + kernel_initializer: (optional) The initializer to use for the weight and + + projection matrices. + + bias_initializer: (optional) The initializer to use for the bias. + + """ + + def __init__(self, + + num_units, + + activation=None, + + reuse=None, + + kernel_initializer=None, + + bias_initializer=None): + + super(VecAttGRUCell, self).__init__(_reuse=reuse) + + self._num_units = num_units + + self._activation = activation or math_ops.tanh + + self._kernel_initializer = kernel_initializer + + self._bias_initializer = bias_initializer + + self._gate_linear = None + + self._candidate_linear = None + + @property + def state_size(self): + + return self._num_units + + @property + def output_size(self): + + return self._num_units + + def __call__(self, inputs, state, att_score): + + return self.call(inputs, state, att_score) + + def call(self, inputs, state, att_score=None): + """Gated recurrent unit (GRU) with nunits cells.""" + + if self._gate_linear is None: + + bias_ones = self._bias_initializer + + if self._bias_initializer is None: + bias_ones = init_ops.constant_initializer( + 1.0, dtype=inputs.dtype) + + with vs.variable_scope("gates"): # Reset gate and update gate. + + self._gate_linear = _Linear( + + [inputs, state], + + 2 * self._num_units, + + True, + + bias_initializer=bias_ones, + + kernel_initializer=self._kernel_initializer) + + value = math_ops.sigmoid(self._gate_linear([inputs, state])) + + r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) + + r_state = r * state + + if self._candidate_linear is None: + with vs.variable_scope("candidate"): + self._candidate_linear = _Linear( + + [inputs, r_state], + + self._num_units, + + True, + + bias_initializer=self._bias_initializer, + + kernel_initializer=self._kernel_initializer) + + c = self._activation(self._candidate_linear([inputs, r_state])) + + u = (1.0 - att_score) * u + + new_h = u * state + (1 - u) * c + + return new_h, new_h diff --git a/modelzoo/FwFM/script/estimator/__init__.py b/modelzoo/FwFM/script/estimator/__init__.py new file mode 100644 index 00000000000..cf4f59d6c09 --- /dev/null +++ b/modelzoo/FwFM/script/estimator/__init__.py @@ -0,0 +1 @@ +from .models import * \ No newline at end of file diff --git a/modelzoo/FwFM/script/estimator/feature_column.py b/modelzoo/FwFM/script/estimator/feature_column.py new file mode 100644 index 00000000000..c8d7a6cd013 --- /dev/null +++ b/modelzoo/FwFM/script/estimator/feature_column.py @@ -0,0 +1,52 @@ +import tensorflow as tf +from tensorflow.python.feature_column.feature_column import _EmbeddingColumn + +from .utils import LINEAR_SCOPE_NAME, variable_scope, get_collection, get_GraphKeys, input_layer, get_losses + + +def linear_model(features, linear_feature_columns): + if tf.__version__ >= '2.0.0': + linear_logits = tf.compat.v1.feature_column.linear_model(features, linear_feature_columns) + else: + linear_logits = tf.feature_column.linear_model(features, linear_feature_columns) + return linear_logits + + +def get_linear_logit(features, linear_feature_columns, l2_reg_linear=0): + with variable_scope(LINEAR_SCOPE_NAME): + if not linear_feature_columns: + linear_logits = tf.Variable([[0.0]], name='bias_weights') + else: + + linear_logits = linear_model(features, linear_feature_columns) + + if l2_reg_linear > 0: + for var in get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME)[:-1]: + get_losses().add_loss(l2_reg_linear * tf.nn.l2_loss(var, name=var.name.split(":")[0] + "_l2loss"), + get_GraphKeys().REGULARIZATION_LOSSES) + return linear_logits + + +def input_from_feature_columns(features, feature_columns, l2_reg_embedding=0.0): + dense_value_list = [] + sparse_emb_list = [] + for feat in feature_columns: + if is_embedding(feat): + sparse_emb = tf.expand_dims(input_layer(features, [feat]), axis=1) + sparse_emb_list.append(sparse_emb) + if l2_reg_embedding > 0: + get_losses().add_loss(l2_reg_embedding * tf.nn.l2_loss(sparse_emb, name=feat.name + "_l2loss"), + get_GraphKeys().REGULARIZATION_LOSSES) + + else: + dense_value_list.append(input_layer(features, [feat])) + + return sparse_emb_list, dense_value_list + + +def is_embedding(feature_column): + try: + from tensorflow.python.feature_column.feature_column_v2 import EmbeddingColumn + except ImportError: + EmbeddingColumn = _EmbeddingColumn + return isinstance(feature_column, (_EmbeddingColumn, EmbeddingColumn)) diff --git a/modelzoo/FwFM/script/estimator/inputs.py b/modelzoo/FwFM/script/estimator/inputs.py new file mode 100644 index 00000000000..2c175a9934e --- /dev/null +++ b/modelzoo/FwFM/script/estimator/inputs.py @@ -0,0 +1,52 @@ +import tensorflow as tf + + +def input_fn_pandas(df, features, label=None, batch_size=256, num_epochs=1, shuffle=False, queue_capacity_factor=10, + num_threads=1): + if label is not None: + y = df[label] + else: + y = None + if tf.__version__ >= "2.0.0": + return tf.compat.v1.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, + num_epochs=num_epochs, + shuffle=shuffle, + queue_capacity=batch_size * queue_capacity_factor, + num_threads=num_threads) + + return tf.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, num_epochs=num_epochs, + shuffle=shuffle, queue_capacity=batch_size * queue_capacity_factor, + num_threads=num_threads) + + +def input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, num_parallel_calls=8, + shuffle_factor=10, prefetch_factor=1, + ): + def _parse_examples(serial_exmp): + try: + features = tf.parse_single_example(serial_exmp, features=feature_description) + except AttributeError: + features = tf.io.parse_single_example(serial_exmp, features=feature_description) + if label is not None: + labels = features.pop(label) + return features, labels + return features + + def input_fn(): + dataset = tf.data.TFRecordDataset(filenames) + dataset = dataset.map(_parse_examples, num_parallel_calls=num_parallel_calls) + if shuffle_factor > 0: + dataset = dataset.shuffle(buffer_size=batch_size * shuffle_factor) + + dataset = dataset.repeat(num_epochs).batch(batch_size) + + if prefetch_factor > 0: + dataset = dataset.prefetch(buffer_size=batch_size * prefetch_factor) + try: + iterator = dataset.make_one_shot_iterator() + except AttributeError: + iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + + return iterator.get_next() + + return input_fn diff --git a/modelzoo/FwFM/script/estimator/models/__init__.py b/modelzoo/FwFM/script/estimator/models/__init__.py new file mode 100644 index 00000000000..9bc1e120dbc --- /dev/null +++ b/modelzoo/FwFM/script/estimator/models/__init__.py @@ -0,0 +1,13 @@ +from .afm import AFMEstimator +from .autoint import AutoIntEstimator +from .ccpm import CCPMEstimator +from .dcn import DCNEstimator +from .deepfm import DeepFMEstimator +from .fwfm import FwFMEstimator +from .fibinet import FiBiNETEstimator +from .fnn import FNNEstimator +from .nfm import NFMEstimator +from .pnn import PNNEstimator +from .wdl import WDLEstimator +from .xdeepfm import xDeepFMEstimator +from .deepfefm import DeepFEFMEstimator diff --git a/modelzoo/FwFM/script/estimator/models/fwfm.py b/modelzoo/FwFM/script/estimator/models/fwfm.py new file mode 100644 index 00000000000..059331643de --- /dev/null +++ b/modelzoo/FwFM/script/estimator/models/fwfm.py @@ -0,0 +1,84 @@ +# -*- coding:utf-8 -*- +""" +Author: + Weichen Shen, weichenswc@163.com + Harshit Pande + +Reference: + [1] Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising + (https://arxiv.org/pdf/1806.03514.pdf) + +""" + +import tensorflow as tf + +from ..feature_column import get_linear_logit, input_from_feature_columns +from ..utils import DNN_SCOPE_NAME, deepctr_model_fn, variable_scope +from ...layers.core import DNN +from ...layers.interaction import FwFMLayer +from ...layers.utils import concat_func, add_func, combined_dnn_input + + +def FwFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), + l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0, + seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None, + config=None, linear_optimizer='Ftrl', + dnn_optimizer='Adagrad', training_chief_hooks=None): + """Instantiates the DeepFwFM Network architecture. + + :param linear_feature_columns: An iterable containing all the features used by linear part of the model. + :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. + :param fm_group: list, group_name of features that will be used to do feature interactions. + :param dnn_hidden_units: list,list of positive integer or empty list if do not want DNN, the layer number and units + in each layer of DNN + :param l2_reg_linear: float. L2 regularizer strength applied to linear part + :param l2_reg_field_strength: float. L2 regularizer strength applied to the field pair strength parameters + :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector + :param l2_reg_dnn: float. L2 regularizer strength applied to DNN + :param seed: integer ,to use as random seed. + :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. + :param dnn_activation: Activation function to use in DNN + :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN + :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + :param model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator + to continue training a previously saved model. + :param config: tf.RunConfig object to configure the runtime settings. + :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the linear part of the model. Defaults to FTRL optimizer. + :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the deep part of the model. Defaults to Adagrad optimizer. + :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to + run on the chief worker during training. + :return: A Tensorflow Estimator instance. + + """ + + def _model_fn(features, labels, mode, config): + train_flag = (mode == tf.estimator.ModeKeys.TRAIN) + + linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) + final_logit_components = [linear_logits] + with variable_scope(DNN_SCOPE_NAME): + sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, + l2_reg_embedding=l2_reg_embedding) + + fwfm_logit = FwFMLayer(num_fields=len(sparse_embedding_list), regularizer=l2_reg_field_strength)( + concat_func(sparse_embedding_list, axis=1)) + + final_logit_components.append(fwfm_logit) + + if dnn_hidden_units: + dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) + + dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag) + dnn_logit = tf.keras.layers.Dense( + 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output) + final_logit_components.append(dnn_logit) + + logits = add_func(final_logit_components) + + return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, + training_chief_hooks=training_chief_hooks) + + return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) diff --git a/modelzoo/FwFM/script/estimator/utils.py b/modelzoo/FwFM/script/estimator/utils.py new file mode 100644 index 00000000000..5d722515f6b --- /dev/null +++ b/modelzoo/FwFM/script/estimator/utils.py @@ -0,0 +1,217 @@ +import tensorflow as tf +from tensorflow.python.estimator.canned.head import _Head +from tensorflow.python.estimator.canned.optimizers import get_optimizer_instance + +LINEAR_SCOPE_NAME = 'linear' +DNN_SCOPE_NAME = 'dnn' + + +def _summary_key(head_name, val): + return '%s/%s' % (val, head_name) if head_name else val + + +class Head(_Head): + + def __init__(self, task, + name=None): + self._task = task + self._name = name + + @property + def name(self): + return self._name + + @property + def logits_dimension(self): + return 1 + + def _eval_metric_ops(self, + labels, + logits, + predictions, + unweighted_loss, + weights=None): + + labels = to_float(labels) + predictions = to_float(predictions) + + # with name_scope(None, 'metrics', (labels, logits, predictions, + # unweighted_loss, weights)): + metrics = get_metrics() + losses = get_losses() + + metric_ops = { + _summary_key(self._name, "prediction/mean"): metrics.mean(predictions, weights=weights), + _summary_key(self._name, "label/mean"): metrics.mean(labels, weights=weights), + } + + summary_scalar("prediction/mean", metric_ops[_summary_key(self._name, "prediction/mean")][1]) + summary_scalar("label/mean", metric_ops[_summary_key(self._name, "label/mean")][1]) + + + mean_loss = losses.compute_weighted_loss( + unweighted_loss, weights=1.0, reduction=losses.Reduction.MEAN) + + if self._task == "binary": + metric_ops[_summary_key(self._name, "LogLoss")] = metrics.mean(mean_loss, weights=weights, ) + summary_scalar("LogLoss", mean_loss) + + metric_ops[_summary_key(self._name, "AUC")] = metrics.auc(labels, predictions, weights=weights) + summary_scalar("AUC", metric_ops[_summary_key(self._name, "AUC")][1]) + else: + + metric_ops[_summary_key(self._name, "MSE")] = metrics.mean_squared_error(labels, predictions, + weights=weights) + summary_scalar("MSE", mean_loss) + + metric_ops[_summary_key(self._name, "MAE")] = metrics.mean_absolute_error(labels, predictions, + weights=weights) + summary_scalar("MAE", metric_ops[_summary_key(self._name, "MAE")][1]) + + return metric_ops + + def create_loss(self, features, mode, logits, labels): + del mode, features # Unused for this head. + losses = get_losses() + if self._task == "binary": + loss = losses.sigmoid_cross_entropy(labels, logits, reduction=losses.Reduction.NONE) + else: + loss = losses.mean_squared_error(labels, logits, reduction=losses.Reduction.NONE) + return loss + + def create_estimator_spec( + self, features, mode, logits, labels=None, train_op_fn=None, training_chief_hooks=None): + # with name_scope('head'): + logits = tf.reshape(logits, [-1, 1]) + if self._task == 'binary': + pred = tf.sigmoid(logits) + else: + pred = logits + + predictions = {"pred": pred, "logits": logits} + export_outputs = {"predict": tf.estimator.export.PredictOutput(predictions)} + if mode == tf.estimator.ModeKeys.PREDICT: + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + export_outputs=export_outputs) + + labels = tf.reshape(labels, [-1, 1]) + + unweighted_loss = self.create_loss(features, mode, logits, labels) + + losses = get_losses() + loss = losses.compute_weighted_loss( + unweighted_loss, weights=1.0, reduction=losses.Reduction.SUM) + reg_loss = losses.get_regularization_loss() + + training_loss = loss + reg_loss + + eval_metric_ops = self._eval_metric_ops(labels, logits, pred, unweighted_loss) + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=training_loss, + train_op=train_op_fn(training_loss), + eval_metric_ops=eval_metric_ops, + training_chief_hooks=training_chief_hooks) + + +def deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks): + linear_optimizer = get_optimizer_instance(linear_optimizer, 0.005) + dnn_optimizer = get_optimizer_instance(dnn_optimizer, 0.01) + train_op_fn = get_train_op_fn(linear_optimizer, dnn_optimizer) + + head = Head(task) + return head.create_estimator_spec(features=features, + mode=mode, + labels=labels, + train_op_fn=train_op_fn, + logits=logits, training_chief_hooks=training_chief_hooks) + + +def get_train_op_fn(linear_optimizer, dnn_optimizer): + def _train_op_fn(loss): + train_ops = [] + try: + global_step = tf.train.get_global_step() + except AttributeError: + global_step = tf.compat.v1.train.get_global_step() + linear_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME) + dnn_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, DNN_SCOPE_NAME) + + if len(dnn_var_list) > 0: + train_ops.append( + dnn_optimizer.minimize( + loss, + var_list=dnn_var_list)) + if len(linear_var_list) > 0: + train_ops.append( + linear_optimizer.minimize( + loss, + var_list=linear_var_list)) + + train_op = tf.group(*train_ops) + with tf.control_dependencies([train_op]): + try: + return tf.assign_add(global_step, 1).op + except AttributeError: + return tf.compat.v1.assign_add(global_step, 1).op + + return _train_op_fn + + +def variable_scope(name_or_scope): + try: + return tf.variable_scope(name_or_scope) + except AttributeError: + return tf.compat.v1.variable_scope(name_or_scope) + +def get_collection(key, scope=None): + try: + return tf.get_collection(key, scope=scope) + except AttributeError: + return tf.compat.v1.get_collection(key, scope=scope) + + +def get_GraphKeys(): + try: + return tf.GraphKeys + except AttributeError: + return tf.compat.v1.GraphKeys + + +def get_losses(): + try: + return tf.compat.v1.losses + except AttributeError: + return tf.losses + + +def input_layer(features, feature_columns): + try: + return tf.feature_column.input_layer(features, feature_columns) + except AttributeError: + return tf.compat.v1.feature_column.input_layer(features, feature_columns) + + +def get_metrics(): + try: + return tf.compat.v1.metrics + except AttributeError: + return tf.metrics + + +def to_float(x, name="ToFloat"): + try: + return tf.to_float(x, name) + except AttributeError: + return tf.compat.v1.to_float(x, name) + + +def summary_scalar(name, data): + try: + tf.summary.scalar(name, data) + except AttributeError: # tf version 2.5.0+:AttributeError: module 'tensorflow._api.v2.summary' has no attribute 'scalar' + tf.compat.v1.summary.scalar(name, data) \ No newline at end of file diff --git a/modelzoo/FwFM/script/feature_column.py b/modelzoo/FwFM/script/feature_column.py new file mode 100644 index 00000000000..3b778360b33 --- /dev/null +++ b/modelzoo/FwFM/script/feature_column.py @@ -0,0 +1,249 @@ +import tensorflow as tf +from collections import namedtuple, OrderedDict +from copy import copy +from itertools import chain + +from tensorflow.python.keras.initializers import RandomNormal, Zeros +from tensorflow.python.keras.layers import Input, Lambda + +from .inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \ + get_varlen_pooling_list, mergeDict +from .layers import Linear +from .layers.utils import concat_func +#from keras import backend as K +import pandas as pd +import numpy as np + +fi = open('../../deep_ctr_master/data/fm.model.txt','r') + +first = True +feat_weights={} +k=0 +for line in fi: + s = line.strip().split() + if first: + first = False + w_0 = float(s[0]) + feat_num = int(s[1]) + k = int(s[2]) + 1 # w and v + + else: + feat = int(s[0]) + weights = [float(s[1 + i]) for i in range(k)] + feat_weights[feat] = weights + +list1 =[] +for col,val in feat_weights.items(): + list1.append(val) + +# def my_init(shape,dtype=None): +# weight = np.array(list1) +# +# return weight.reshape(shape) + + +DEFAULT_GROUP_NAME = "default_group" + + +class SparseFeat(namedtuple('SparseFeat', + ['name', 'vocabulary_size', 'embedding_dim', 'use_hash', 'vocabulary_path', 'dtype', 'embeddings_initializer', + 'embedding_name', + 'group_name', 'trainable'])): + __slots__ = () + + def __new__(cls, name, vocabulary_size, embedding_dim=4, use_hash=False, vocabulary_path=None, dtype="int32", embeddings_initializer=None, + embedding_name=None, + group_name=DEFAULT_GROUP_NAME, trainable=True): + + if embedding_dim == "auto": + embedding_dim = 6 * int(pow(vocabulary_size, 0.25)) + if embeddings_initializer is None: + embeddings_initializer = RandomNormal(mean=0.0, stddev=0.0001, seed=2020) + # if embeddings_initializer=='fm': + # embeddings_initializer = my_init(shape=(vocabulary_size,embedding_dim)) + + + + if embedding_name is None: + embedding_name = name + + return super(SparseFeat, cls).__new__(cls, name, vocabulary_size, embedding_dim, use_hash, vocabulary_path, dtype, + embeddings_initializer, + embedding_name, group_name, trainable) + + def __hash__(self): + return self.name.__hash__() + + +class VarLenSparseFeat(namedtuple('VarLenSparseFeat', + ['sparsefeat', 'maxlen', 'combiner', 'length_name', 'weight_name', 'weight_norm'])): + __slots__ = () + + def __new__(cls, sparsefeat, maxlen, combiner="mean", length_name=None, weight_name=None, weight_norm=True): + return super(VarLenSparseFeat, cls).__new__(cls, sparsefeat, maxlen, combiner, length_name, weight_name, + weight_norm) + + @property + def name(self): + return self.sparsefeat.name + + @property + def vocabulary_size(self): + return self.sparsefeat.vocabulary_size + + @property + def embedding_dim(self): + return self.sparsefeat.embedding_dim + + @property + def use_hash(self): + return self.sparsefeat.use_hash + + @property + def vocabulary_path(self): + return self.sparsefeat.vocabulary_path + + @property + def dtype(self): + return self.sparsefeat.dtype + + @property + def embeddings_initializer(self): + return self.sparsefeat.embeddings_initializer + + @property + def embedding_name(self): + return self.sparsefeat.embedding_name + + @property + def group_name(self): + return self.sparsefeat.group_name + + @property + def trainable(self): + return self.sparsefeat.trainable + + def __hash__(self): + return self.name.__hash__() + + +class DenseFeat(namedtuple('DenseFeat', ['name', 'dimension', 'dtype', 'transform_fn'])): + """ Dense feature + Args: + name: feature name, + dimension: dimension of the feature, default = 1. + dtype: dtype of the feature, default="float32". + transform_fn: If not `None` , a function that can be used to transform + values of the feature. the function takes the input Tensor as its + argument, and returns the output Tensor. + (e.g. lambda x: (x - 3.0) / 4.2). + """ + __slots__ = () + + def __new__(cls, name, dimension=1, dtype="float32", transform_fn=None): + return super(DenseFeat, cls).__new__(cls, name, dimension, dtype, transform_fn) + + def __hash__(self): + return self.name.__hash__() + + # def __eq__(self, other): + # if self.name == other.name: + # return True + # return False + + # def __repr__(self): + # return 'DenseFeat:'+self.name + + +def get_feature_names(feature_columns): + features = build_input_features(feature_columns) + return list(features.keys()) + + +def build_input_features(feature_columns, prefix=''): + input_features = OrderedDict() + for fc in feature_columns: + if isinstance(fc, SparseFeat): + input_features[fc.name] = Input( + shape=(1,), name=prefix + fc.name, dtype=fc.dtype) + elif isinstance(fc, DenseFeat): + input_features[fc.name] = Input( + shape=(fc.dimension,), name=prefix + fc.name, dtype=fc.dtype) + elif isinstance(fc, VarLenSparseFeat): + input_features[fc.name] = Input(shape=(fc.maxlen,), name=prefix + fc.name, + dtype=fc.dtype) + if fc.weight_name is not None: + input_features[fc.weight_name] = Input(shape=(fc.maxlen, 1), name=prefix + fc.weight_name, + dtype="float32") + if fc.length_name is not None: + input_features[fc.length_name] = Input((1,), name=prefix + fc.length_name, dtype='int32') + + else: + raise TypeError("Invalid feature column type,got", type(fc)) + + return input_features + + +def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear', + l2_reg=0, sparse_feat_refine_weight=None): + linear_feature_columns = copy(feature_columns) + for i in range(len(linear_feature_columns)): + if isinstance(linear_feature_columns[i], SparseFeat): + linear_feature_columns[i] = linear_feature_columns[i]._replace(embedding_dim=1, + embeddings_initializer=Zeros()) + if isinstance(linear_feature_columns[i], VarLenSparseFeat): + linear_feature_columns[i] = linear_feature_columns[i]._replace( + sparsefeat=linear_feature_columns[i].sparsefeat._replace(embedding_dim=1, + embeddings_initializer=Zeros())) + + linear_emb_list = [input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, + prefix=prefix + str(i))[0] for i in range(units)] + _, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix) + + linear_logit_list = [] + for i in range(units): + + if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0: + sparse_input = concat_func(linear_emb_list[i]) + dense_input = concat_func(dense_input_list) + if sparse_feat_refine_weight is not None: + sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))( + [sparse_input, sparse_feat_refine_weight]) + linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input]) + elif len(linear_emb_list[i]) > 0: + sparse_input = concat_func(linear_emb_list[i]) + if sparse_feat_refine_weight is not None: + sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))( + [sparse_input, sparse_feat_refine_weight]) + linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input) + elif len(dense_input_list) > 0: + dense_input = concat_func(dense_input_list) + linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input) + else: #empty feature_columns + return Lambda(lambda x: tf.constant([[0.0]]))(list(features.values())[0]) + linear_logit_list.append(linear_logit) + + return concat_func(linear_logit_list) + + +def input_from_feature_columns(features, feature_columns, l2_reg, seed, prefix='', seq_mask_zero=True, + support_dense=True, support_group=False): + sparse_feature_columns = list( + filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else [] + varlen_sparse_feature_columns = list( + filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else [] + + embedding_matrix_dict = create_embedding_matrix(feature_columns, l2_reg, seed, prefix=prefix, + seq_mask_zero=seq_mask_zero) + group_sparse_embedding_dict = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns) + dense_value_list = get_dense_input(features, feature_columns) + if not support_dense and len(dense_value_list) > 0: + raise ValueError("DenseFeat is not supported in dnn_feature_columns") + + sequence_embed_dict = varlen_embedding_lookup(embedding_matrix_dict, features, varlen_sparse_feature_columns) + group_varlen_sparse_embedding_dict = get_varlen_pooling_list(sequence_embed_dict, features, + varlen_sparse_feature_columns) + group_embedding_dict = mergeDict(group_sparse_embedding_dict, group_varlen_sparse_embedding_dict) + if not support_group: + group_embedding_dict = list(chain.from_iterable(group_embedding_dict.values())) + return group_embedding_dict, dense_value_list diff --git a/modelzoo/FwFM/script/inputs.py b/modelzoo/FwFM/script/inputs.py new file mode 100644 index 00000000000..d567f846265 --- /dev/null +++ b/modelzoo/FwFM/script/inputs.py @@ -0,0 +1,155 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +from collections import defaultdict +from itertools import chain + +from tensorflow.python.keras.layers import Embedding, Lambda +from tensorflow.python.keras.regularizers import l2 + +from .layers.sequence import SequencePoolingLayer, WeightedSequenceLayer +from .layers.utils import Hash + + +def get_inputs_list(inputs): + return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs))))) + + +def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, l2_reg, + prefix='sparse_', seq_mask_zero=True): + sparse_embedding = {} + for feat in sparse_feature_columns: + emb = Embedding(feat.vocabulary_size, feat.embedding_dim, + embeddings_initializer=feat.embeddings_initializer, + embeddings_regularizer=l2(l2_reg), + name=prefix + '_emb_' + feat.embedding_name) + emb.trainable = feat.trainable + sparse_embedding[feat.embedding_name] = emb + + if varlen_sparse_feature_columns and len(varlen_sparse_feature_columns) > 0: + for feat in varlen_sparse_feature_columns: + # if feat.name not in sparse_embedding: + emb = Embedding(feat.vocabulary_size, feat.embedding_dim, + embeddings_initializer=feat.embeddings_initializer, + embeddings_regularizer=l2( + l2_reg), + name=prefix + '_seq_emb_' + feat.name, + mask_zero=seq_mask_zero) + emb.trainable = feat.trainable + sparse_embedding[feat.embedding_name] = emb + return sparse_embedding + + +def get_embedding_vec_list(embedding_dict, input_dict, sparse_feature_columns, return_feat_list=(), mask_feat_list=()): + embedding_vec_list = [] + for fg in sparse_feature_columns: + feat_name = fg.name + if len(return_feat_list) == 0 or feat_name in return_feat_list: + if fg.use_hash: + lookup_idx = Hash(fg.vocabulary_size, mask_zero=(feat_name in mask_feat_list), vocabulary_path=fg.vocabulary_path)(input_dict[feat_name]) + else: + lookup_idx = input_dict[feat_name] + + embedding_vec_list.append(embedding_dict[feat_name](lookup_idx)) + + return embedding_vec_list + + +def create_embedding_matrix(feature_columns, l2_reg, seed, prefix="", seq_mask_zero=True): + from . import feature_column as fc_lib + + sparse_feature_columns = list( + filter(lambda x: isinstance(x, fc_lib.SparseFeat), feature_columns)) if feature_columns else [] + varlen_sparse_feature_columns = list( + filter(lambda x: isinstance(x, fc_lib.VarLenSparseFeat), feature_columns)) if feature_columns else [] + sparse_emb_dict = create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, + l2_reg, prefix=prefix + 'sparse', seq_mask_zero=seq_mask_zero) + return sparse_emb_dict + + +def embedding_lookup(sparse_embedding_dict, sparse_input_dict, sparse_feature_columns, return_feat_list=(), + mask_feat_list=(), to_list=False): + group_embedding_dict = defaultdict(list) + for fc in sparse_feature_columns: + feature_name = fc.name + embedding_name = fc.embedding_name + if (len(return_feat_list) == 0 or feature_name in return_feat_list): + if fc.use_hash: + lookup_idx = Hash(fc.vocabulary_size, mask_zero=(feature_name in mask_feat_list), vocabulary_path=fc.vocabulary_path)( + sparse_input_dict[feature_name]) + else: + lookup_idx = sparse_input_dict[feature_name] + + group_embedding_dict[fc.group_name].append(sparse_embedding_dict[embedding_name](lookup_idx)) + if to_list: + return list(chain.from_iterable(group_embedding_dict.values())) + return group_embedding_dict + + +def varlen_embedding_lookup(embedding_dict, sequence_input_dict, varlen_sparse_feature_columns): + varlen_embedding_vec_dict = {} + for fc in varlen_sparse_feature_columns: + feature_name = fc.name + embedding_name = fc.embedding_name + if fc.use_hash: + lookup_idx = Hash(fc.vocabulary_size, mask_zero=True, vocabulary_path=fc.vocabulary_path)(sequence_input_dict[feature_name]) + else: + lookup_idx = sequence_input_dict[feature_name] + varlen_embedding_vec_dict[feature_name] = embedding_dict[embedding_name](lookup_idx) + return varlen_embedding_vec_dict + + +def get_varlen_pooling_list(embedding_dict, features, varlen_sparse_feature_columns, to_list=False): + pooling_vec_list = defaultdict(list) + for fc in varlen_sparse_feature_columns: + feature_name = fc.name + combiner = fc.combiner + feature_length_name = fc.length_name + if feature_length_name is not None: + if fc.weight_name is not None: + seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm)( + [embedding_dict[feature_name], features[feature_length_name], features[fc.weight_name]]) + else: + seq_input = embedding_dict[feature_name] + vec = SequencePoolingLayer(combiner, supports_masking=False)( + [seq_input, features[feature_length_name]]) + else: + if fc.weight_name is not None: + seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm, supports_masking=True)( + [embedding_dict[feature_name], features[fc.weight_name]]) + else: + seq_input = embedding_dict[feature_name] + vec = SequencePoolingLayer(combiner, supports_masking=True)( + seq_input) + pooling_vec_list[fc.group_name].append(vec) + if to_list: + return chain.from_iterable(pooling_vec_list.values()) + return pooling_vec_list + + +def get_dense_input(features, feature_columns): + from . import feature_column as fc_lib + dense_feature_columns = list( + filter(lambda x: isinstance(x, fc_lib.DenseFeat), feature_columns)) if feature_columns else [] + dense_input_list = [] + for fc in dense_feature_columns: + if fc.transform_fn is None: + dense_input_list.append(features[fc.name]) + else: + transform_result = Lambda(fc.transform_fn)(features[fc.name]) + dense_input_list.append(transform_result) + return dense_input_list + + +def mergeDict(a, b): + c = defaultdict(list) + for k, v in a.items(): + c[k].extend(v) + for k, v in b.items(): + c[k].extend(v) + return c diff --git a/modelzoo/FwFM/script/layers/__init__.py b/modelzoo/FwFM/script/layers/__init__.py new file mode 100644 index 00000000000..1bfd40effe7 --- /dev/null +++ b/modelzoo/FwFM/script/layers/__init__.py @@ -0,0 +1,52 @@ +import tensorflow as tf + +from .activation import Dice +from .core import DNN, LocalActivationUnit, PredictionLayer +from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet, CrossNetMix, + InnerProductLayer, InteractingLayer, + OutterProductLayer, FGCNNLayer, SENETLayer, BilinearInteraction, + FieldWiseBiInteraction, FwFMLayer, FEFMLayer) +from .normalization import LayerNormalization +from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM, + KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer, + Transformer, DynamicGRU,PositionEncoding) + +from .utils import NoMask, Hash, Linear, _Add, combined_dnn_input, softmax, reduce_sum + +custom_objects = {'tf': tf, + 'InnerProductLayer': InnerProductLayer, + 'OutterProductLayer': OutterProductLayer, + 'DNN': DNN, + 'PredictionLayer': PredictionLayer, + 'FM': FM, + 'AFMLayer': AFMLayer, + 'CrossNet': CrossNet, + 'CrossNetMix': CrossNetMix, + 'BiInteractionPooling': BiInteractionPooling, + 'LocalActivationUnit': LocalActivationUnit, + 'Dice': Dice, + 'SequencePoolingLayer': SequencePoolingLayer, + 'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer, + 'CIN': CIN, + 'InteractingLayer': InteractingLayer, + 'LayerNormalization': LayerNormalization, + 'BiLSTM': BiLSTM, + 'Transformer': Transformer, + 'NoMask': NoMask, + 'BiasEncoding': BiasEncoding, + 'KMaxPooling': KMaxPooling, + 'FGCNNLayer': FGCNNLayer, + 'Hash': Hash, + 'Linear': Linear, + 'DynamicGRU': DynamicGRU, + 'SENETLayer': SENETLayer, + 'BilinearInteraction': BilinearInteraction, + 'WeightedSequenceLayer': WeightedSequenceLayer, + '_Add': _Add, + 'FieldWiseBiInteraction': FieldWiseBiInteraction, + 'FwFMLayer': FwFMLayer, + 'softmax': softmax, + 'FEFMLayer': FEFMLayer, + 'reduce_sum': reduce_sum, + 'PositionEncoding':PositionEncoding + } diff --git a/modelzoo/FwFM/script/layers/activation.py b/modelzoo/FwFM/script/layers/activation.py new file mode 100644 index 00000000000..1b953bff8bc --- /dev/null +++ b/modelzoo/FwFM/script/layers/activation.py @@ -0,0 +1,85 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import tensorflow as tf + +try: + from tensorflow.python.ops.init_ops import Zeros +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros +from tensorflow.python.keras.layers import Layer, Activation + +try: + from tensorflow.python.keras.layers import BatchNormalization +except ImportError: + BatchNormalization = tf.keras.layers.BatchNormalization + +try: + unicode +except NameError: + unicode = str + + +class Dice(Layer): + """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data. + + Input shape + - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. + + Output shape + - Same shape as the input. + + Arguments + - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). + + - **epsilon** : Small float added to variance to avoid dividing by zero. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, axis=-1, epsilon=1e-9, **kwargs): + self.axis = axis + self.epsilon = epsilon + super(Dice, self).__init__(**kwargs) + + def build(self, input_shape): + self.bn = BatchNormalization( + axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros( + ), dtype=tf.float32, name='dice_alpha') # name='alpha_'+self.name + super(Dice, self).build(input_shape) # Be sure to call this somewhere! + self.uses_learning_phase = True + + def call(self, inputs, training=None, **kwargs): + inputs_normed = self.bn(inputs, training=training) + # tf.layers.batch_normalization( + # inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + x_p = tf.sigmoid(inputs_normed) + return self.alphas * (1.0 - x_p) * inputs + x_p * inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'axis': self.axis, 'epsilon': self.epsilon} + base_config = super(Dice, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def activation_layer(activation): + if activation in ("dice", "Dice"): + act_layer = Dice() + elif isinstance(activation, (str, unicode)): + act_layer = Activation(activation) + elif issubclass(activation, Layer): + act_layer = activation() + else: + raise ValueError( + "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation)) + return act_layer diff --git a/modelzoo/FwFM/script/layers/core.py b/modelzoo/FwFM/script/layers/core.py new file mode 100644 index 00000000000..668348d2eb7 --- /dev/null +++ b/modelzoo/FwFM/script/layers/core.py @@ -0,0 +1,267 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import tensorflow as tf +from tensorflow.python.keras import backend as K + +try: + from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal +except ImportError: + from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal + +from tensorflow.python.keras.layers import Layer, Dropout + +try: + from tensorflow.python.keras.layers import BatchNormalization +except ImportError: + BatchNormalization = tf.keras.layers.BatchNormalization +from tensorflow.python.keras.regularizers import l2 + +from .activation import activation_layer + + +class LocalActivationUnit(Layer): + """The LocalActivationUnit used in DIN with which the representation of + user interests varies adaptively given different candidate items. + + Input shape + - A list of two 3D tensor with shape: ``(batch_size, 1, embedding_size)`` and ``(batch_size, T, embedding_size)`` + + Output shape + - 3D tensor with shape: ``(batch_size, T, 1)``. + + Arguments + - **hidden_units**:list of positive integer, the attention net layer number and units in each layer. + + - **activation**: Activation function to use in attention net. + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix of attention net. + + - **dropout_rate**: float in [0,1). Fraction of the units to dropout in attention net. + + - **use_bn**: bool. Whether use BatchNormalization before activation or not in attention net. + + - **seed**: A Python integer to use as random seed. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, hidden_units=(64, 32), activation='sigmoid', l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, + **kwargs): + self.hidden_units = hidden_units + self.activation = activation + self.l2_reg = l2_reg + self.dropout_rate = dropout_rate + self.use_bn = use_bn + self.seed = seed + super(LocalActivationUnit, self).__init__(**kwargs) + self.supports_masking = True + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) != 2: + raise ValueError('A `LocalActivationUnit` layer should be called ' + 'on a list of 2 inputs') + + if len(input_shape[0]) != 3 or len(input_shape[1]) != 3: + raise ValueError("Unexpected inputs dimensions %d and %d, expect to be 3 dimensions" % ( + len(input_shape[0]), len(input_shape[1]))) + + if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1: + raise ValueError('A `LocalActivationUnit` layer requires ' + 'inputs of a two inputs with shape (None,1,embedding_size) and (None,T,embedding_size)' + 'Got different shapes: %s,%s' % (input_shape[0], input_shape[1])) + size = 4 * \ + int(input_shape[0][-1] + ) if len(self.hidden_units) == 0 else self.hidden_units[-1] + self.kernel = self.add_weight(shape=(size, 1), + initializer=glorot_normal( + seed=self.seed), + name="kernel") + self.bias = self.add_weight( + shape=(1,), initializer=Zeros(), name="bias") + self.dnn = DNN(self.hidden_units, self.activation, self.l2_reg, self.dropout_rate, self.use_bn, seed=self.seed) + + super(LocalActivationUnit, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, training=None, **kwargs): + + query, keys = inputs + + keys_len = keys.get_shape()[1] + queries = K.repeat_elements(query, keys_len, 1) + + att_input = tf.concat( + [queries, keys, queries - keys, queries * keys], axis=-1) + + att_out = self.dnn(att_input, training=training) + + attention_score = tf.nn.bias_add(tf.tensordot(att_out, self.kernel, axes=(-1, 0)), self.bias) + + return attention_score + + def compute_output_shape(self, input_shape): + return input_shape[1][:2] + (1,) + + def compute_mask(self, inputs, mask): + return mask + + def get_config(self, ): + config = {'activation': self.activation, 'hidden_units': self.hidden_units, + 'l2_reg': self.l2_reg, 'dropout_rate': self.dropout_rate, 'use_bn': self.use_bn, 'seed': self.seed} + base_config = super(LocalActivationUnit, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class DNN(Layer): + """The Multi Layer Percetron + + Input shape + - nD tensor with shape: ``(batch_size, ..., input_dim)``. The most common situation would be a 2D input with shape ``(batch_size, input_dim)``. + + Output shape + - nD tensor with shape: ``(batch_size, ..., hidden_size[-1])``. For instance, for a 2D input with shape ``(batch_size, input_dim)``, the output would have shape ``(batch_size, hidden_size[-1])``. + + Arguments + - **hidden_units**:list of positive integer, the layer number and units in each layer. + + - **activation**: Activation function to use. + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix. + + - **dropout_rate**: float in [0,1). Fraction of the units to dropout. + + - **use_bn**: bool. Whether use BatchNormalization before activation or not. + + - **output_activation**: Activation function to use in the last layer.If ``None``,it will be same as ``activation``. + + - **seed**: A Python integer to use as random seed. + """ + + def __init__(self, hidden_units, activation='relu', l2_reg=0, dropout_rate=0, use_bn=False, output_activation=None, + seed=1024, **kwargs): + self.hidden_units = hidden_units + self.activation = activation + self.l2_reg = l2_reg + self.dropout_rate = dropout_rate + self.use_bn = use_bn + self.output_activation = output_activation + self.seed = seed + + super(DNN, self).__init__(**kwargs) + + def build(self, input_shape): + # if len(self.hidden_units) == 0: + # raise ValueError("hidden_units is empty") + input_size = input_shape[-1] + hidden_units = [int(input_size)] + list(self.hidden_units) + self.kernels = [self.add_weight(name='kernel' + str(i), + shape=( + hidden_units[i], hidden_units[i + 1]), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(len(self.hidden_units))] + self.bias = [self.add_weight(name='bias' + str(i), + shape=(self.hidden_units[i],), + initializer=Zeros(), + trainable=True) for i in range(len(self.hidden_units))] + if self.use_bn: + self.bn_layers = [BatchNormalization() for _ in range(len(self.hidden_units))] + + self.dropout_layers = [Dropout(self.dropout_rate, seed=self.seed + i) for i in + range(len(self.hidden_units))] + + self.activation_layers = [activation_layer(self.activation) for _ in range(len(self.hidden_units))] + + if self.output_activation: + self.activation_layers[-1] = activation_layer(self.output_activation) + + super(DNN, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, training=None, **kwargs): + + deep_input = inputs + + for i in range(len(self.hidden_units)): + fc = tf.nn.bias_add(tf.tensordot( + deep_input, self.kernels[i], axes=(-1, 0)), self.bias[i]) + + if self.use_bn: + fc = self.bn_layers[i](fc, training=training) + try: + fc = self.activation_layers[i](fc, training=training) + except TypeError as e: # TypeError: call() got an unexpected keyword argument 'training' + print("make sure the activation function use training flag properly", e) + fc = self.activation_layers[i](fc) + + fc = self.dropout_layers[i](fc, training=training) + deep_input = fc + + return deep_input + + def compute_output_shape(self, input_shape): + if len(self.hidden_units) > 0: + shape = input_shape[:-1] + (self.hidden_units[-1],) + else: + shape = input_shape + + return tuple(shape) + + def get_config(self, ): + config = {'activation': self.activation, 'hidden_units': self.hidden_units, + 'l2_reg': self.l2_reg, 'use_bn': self.use_bn, 'dropout_rate': self.dropout_rate, + 'output_activation': self.output_activation, 'seed': self.seed} + base_config = super(DNN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class PredictionLayer(Layer): + """ + Arguments + - **task**: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + + - **use_bias**: bool.Whether add bias term or not. + """ + + def __init__(self, task='binary', use_bias=True, **kwargs): + if task not in ["binary", "multiclass", "regression"]: + raise ValueError("task must be binary,multiclass or regression") + self.task = task + self.use_bias = use_bias + super(PredictionLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if self.use_bias: + self.global_bias = self.add_weight( + shape=(1,), initializer=Zeros(), name="global_bias") + + # Be sure to call this somewhere! + super(PredictionLayer, self).build(input_shape) + + def call(self, inputs, **kwargs): + x = inputs + if self.use_bias: + x = tf.nn.bias_add(x, self.global_bias, data_format='NHWC') + if self.task == "binary": + x = tf.sigmoid(x) + + output = tf.reshape(x, (-1, 1)) + + return output + + def compute_output_shape(self, input_shape): + return (None, 1) + + def get_config(self, ): + config = {'task': self.task, 'use_bias': self.use_bias} + base_config = super(PredictionLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/modelzoo/FwFM/script/layers/interaction.py b/modelzoo/FwFM/script/layers/interaction.py new file mode 100644 index 00000000000..f19be14be9c --- /dev/null +++ b/modelzoo/FwFM/script/layers/interaction.py @@ -0,0 +1,1492 @@ +# -*- coding:utf-8 -*- +""" + +Authors: + Weichen Shen,weichenswc@163.com, + Harshit Pande + +""" + +import itertools + +import tensorflow as tf +from tensorflow.python.keras import backend as K +from tensorflow.python.keras.backend import batch_dot + +try: + from tensorflow.python.ops.init_ops import Zeros, Ones, Constant, TruncatedNormal, \ + glorot_normal_initializer as glorot_normal, \ + glorot_uniform_initializer as glorot_uniform +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros, Ones, Constant, TruncatedNormal, glorot_normal, glorot_uniform + +from tensorflow.python.keras.layers import Layer, MaxPooling2D, Conv2D, Dropout, Lambda, Dense, Flatten +from tensorflow.python.keras.regularizers import l2 +from tensorflow.python.layers import utils + +from .activation import activation_layer +from .utils import concat_func, reduce_sum, softmax, reduce_mean + + +class AFMLayer(Layer): + """Attentonal Factorization Machine models pairwise (order-2) feature + interactions without linear term and bias. + + Input shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + + Arguments + - **attention_factor** : Positive integer, dimensionality of the + attention network output space. + + - **l2_reg_w** : float between 0 and 1. L2 regularizer strength + applied to attention network. + + - **dropout_rate** : float between in [0,1). Fraction of the attention net output units to dropout. + + - **seed** : A Python integer to use as random seed. + + References + - [Attentional Factorization Machines : Learning the Weight of Feature + Interactions via Attention Networks](https://arxiv.org/pdf/1708.04617.pdf) + """ + + def __init__(self, attention_factor=4, l2_reg_w=0, dropout_rate=0, seed=1024, **kwargs): + self.attention_factor = attention_factor + self.l2_reg_w = l2_reg_w + self.dropout_rate = dropout_rate + self.seed = seed + super(AFMLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + # input_shape = input_shape[0] + # if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `AttentionalFM` layer should be called ' + 'on a list of at least 2 inputs') + + shape_set = set() + reduced_input_shape = [shape.as_list() for shape in input_shape] + for i in range(len(input_shape)): + shape_set.add(tuple(reduced_input_shape[i])) + + if len(shape_set) > 1: + raise ValueError('A `AttentionalFM` layer requires ' + 'inputs with same shapes ' + 'Got different shapes: %s' % (shape_set)) + + if len(input_shape[0]) != 3 or input_shape[0][1] != 1: + raise ValueError('A `AttentionalFM` layer requires ' + 'inputs of a list with same shape tensor like\ + (None, 1, embedding_size)' + 'Got different shapes: %s' % (input_shape[0])) + + embedding_size = int(input_shape[0][-1]) + + self.attention_W = self.add_weight(shape=(embedding_size, + self.attention_factor), initializer=glorot_normal(seed=self.seed), + regularizer=l2(self.l2_reg_w), name="attention_W") + self.attention_b = self.add_weight( + shape=(self.attention_factor,), initializer=Zeros(), name="attention_b") + self.projection_h = self.add_weight(shape=(self.attention_factor, 1), + initializer=glorot_normal(seed=self.seed), name="projection_h") + self.projection_p = self.add_weight(shape=( + embedding_size, 1), initializer=glorot_normal(seed=self.seed), name="projection_p") + self.dropout = Dropout( + self.dropout_rate, seed=self.seed) + + self.tensordot = Lambda( + lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0))) + + # Be sure to call this somewhere! + super(AFMLayer, self).build(input_shape) + + def call(self, inputs, training=None, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embeds_vec_list = inputs + row = [] + col = [] + + for r, c in itertools.combinations(embeds_vec_list, 2): + row.append(r) + col.append(c) + + p = tf.concat(row, axis=1) + q = tf.concat(col, axis=1) + inner_product = p * q + + bi_interaction = inner_product + attention_temp = tf.nn.relu(tf.nn.bias_add(tf.tensordot( + bi_interaction, self.attention_W, axes=(-1, 0)), self.attention_b)) + # Dense(self.attention_factor,'relu',kernel_regularizer=l2(self.l2_reg_w))(bi_interaction) + self.normalized_att_score = softmax(tf.tensordot( + attention_temp, self.projection_h, axes=(-1, 0)), dim=1) + attention_output = reduce_sum( + self.normalized_att_score * bi_interaction, axis=1) + + attention_output = self.dropout(attention_output, training=training) # training + + afm_out = self.tensordot([attention_output, self.projection_p]) + return afm_out + + def compute_output_shape(self, input_shape): + + if not isinstance(input_shape, list): + raise ValueError('A `AFMLayer` layer should be called ' + 'on a list of inputs.') + return (None, 1) + + def get_config(self, ): + config = {'attention_factor': self.attention_factor, + 'l2_reg_w': self.l2_reg_w, 'dropout_rate': self.dropout_rate, 'seed': self.seed} + base_config = super(AFMLayer, self).get_config() + base_config.update(config) + return base_config + + +class BiInteractionPooling(Layer): + """Bi-Interaction Layer used in Neural FM,compress the + pairwise element-wise product of features into one single vector. + + Input shape + - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``. + + Output shape + - 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + References + - [He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364.](http://arxiv.org/abs/1708.05027) + """ + + def __init__(self, **kwargs): + + super(BiInteractionPooling, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + + super(BiInteractionPooling, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + concated_embeds_value = inputs + square_of_sum = tf.square(reduce_sum( + concated_embeds_value, axis=1, keep_dims=True)) + sum_of_square = reduce_sum( + concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True) + cross_term = 0.5 * (square_of_sum - sum_of_square) + + return cross_term + + def compute_output_shape(self, input_shape): + return (None, 1, input_shape[-1]) + + +class CIN(Layer): + """Compressed Interaction Network used in xDeepFM.This implemention is + adapted from code that the author of the paper published on https://github.com/Leavingseason/xDeepFM. + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, featuremap_num)`` ``featuremap_num = sum(self.layer_size[:-1]) // 2 + self.layer_size[-1]`` if ``split_half=True``,else ``sum(layer_size)`` . + + Arguments + - **layer_size** : list of int.Feature maps in each layer. + + - **activation** : activation function used on feature maps. + + - **split_half** : bool.if set to False, half of the feature maps in each hidden will connect to output unit. + + - **seed** : A Python integer to use as random seed. + + References + - [Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.] (https://arxiv.org/pdf/1803.05170.pdf) + """ + + def __init__(self, layer_size=(128, 128), activation='relu', split_half=True, l2_reg=1e-5, seed=1024, **kwargs): + if len(layer_size) == 0: + raise ValueError( + "layer_size must be a list(tuple) of length greater than 1") + self.layer_size = layer_size + self.split_half = split_half + self.activation = activation + self.l2_reg = l2_reg + self.seed = seed + super(CIN, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + + self.field_nums = [int(input_shape[1])] + self.filters = [] + self.bias = [] + for i, size in enumerate(self.layer_size): + + self.filters.append(self.add_weight(name='filter' + str(i), + shape=[1, self.field_nums[-1] + * self.field_nums[0], size], + dtype=tf.float32, initializer=glorot_uniform( + seed=self.seed + i), + regularizer=l2(self.l2_reg))) + + self.bias.append(self.add_weight(name='bias' + str(i), shape=[size], dtype=tf.float32, + initializer=Zeros())) + + if self.split_half: + if i != len(self.layer_size) - 1 and size % 2 > 0: + raise ValueError( + "layer_size must be even number except for the last layer when split_half=True") + + self.field_nums.append(size // 2) + else: + self.field_nums.append(size) + + self.activation_layers = [activation_layer( + self.activation) for _ in self.layer_size] + + super(CIN, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + dim = int(inputs.get_shape()[-1]) + hidden_nn_layers = [inputs] + final_result = [] + + split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2) + for idx, layer_size in enumerate(self.layer_size): + split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2) + + dot_result_m = tf.matmul( + split_tensor0, split_tensor, transpose_b=True) + + dot_result_o = tf.reshape( + dot_result_m, shape=[dim, -1, self.field_nums[0] * self.field_nums[idx]]) + + dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2]) + + curr_out = tf.nn.conv1d( + dot_result, filters=self.filters[idx], stride=1, padding='VALID') + + curr_out = tf.nn.bias_add(curr_out, self.bias[idx]) + + curr_out = self.activation_layers[idx](curr_out) + + curr_out = tf.transpose(curr_out, perm=[0, 2, 1]) + + if self.split_half: + if idx != len(self.layer_size) - 1: + next_hidden, direct_connect = tf.split( + curr_out, 2 * [layer_size // 2], 1) + else: + direct_connect = curr_out + next_hidden = 0 + else: + direct_connect = curr_out + next_hidden = curr_out + + final_result.append(direct_connect) + hidden_nn_layers.append(next_hidden) + + result = tf.concat(final_result, axis=1) + result = reduce_sum(result, -1, keep_dims=False) + + return result + + def compute_output_shape(self, input_shape): + if self.split_half: + featuremap_num = sum( + self.layer_size[:-1]) // 2 + self.layer_size[-1] + else: + featuremap_num = sum(self.layer_size) + return (None, featuremap_num) + + def get_config(self, ): + + config = {'layer_size': self.layer_size, 'split_half': self.split_half, 'activation': self.activation, + 'seed': self.seed} + base_config = super(CIN, self).get_config() + base_config.update(config) + return base_config + + +class CrossNet(Layer): + """The Cross Network part of Deep&Cross Network model, + which leans both low and high degree cross feature. + + Input shape + - 2D tensor with shape: ``(batch_size, units)``. + + Output shape + - 2D tensor with shape: ``(batch_size, units)``. + + Arguments + - **layer_num**: Positive integer, the cross layer number + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix + + - **parameterization**: string, ``"vector"`` or ``"matrix"`` , way to parameterize the cross network. + + - **seed**: A Python integer to use as random seed. + + References + - [Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12.](https://arxiv.org/abs/1708.05123) + """ + + def __init__(self, layer_num=2, parameterization='vector', l2_reg=0, seed=1024, **kwargs): + self.layer_num = layer_num + self.parameterization = parameterization + self.l2_reg = l2_reg + self.seed = seed + print('CrossNet parameterization:', self.parameterization) + super(CrossNet, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),)) + + dim = int(input_shape[-1]) + if self.parameterization == 'vector': + self.kernels = [self.add_weight(name='kernel' + str(i), + shape=(dim, 1), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + elif self.parameterization == 'matrix': + self.kernels = [self.add_weight(name='kernel' + str(i), + shape=(dim, dim), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + else: # error + raise ValueError("parameterization should be 'vector' or 'matrix'") + self.bias = [self.add_weight(name='bias' + str(i), + shape=(dim, 1), + initializer=Zeros(), + trainable=True) for i in range(self.layer_num)] + # Be sure to call this somewhere! + super(CrossNet, self).build(input_shape) + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs))) + + x_0 = tf.expand_dims(inputs, axis=2) + x_l = x_0 + for i in range(self.layer_num): + if self.parameterization == 'vector': + xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0)) + dot_ = tf.matmul(x_0, xl_w) + x_l = dot_ + self.bias[i] + x_l + elif self.parameterization == 'matrix': + xl_w = tf.einsum('ij,bjk->bik', self.kernels[i], x_l) # W * xi (bs, dim, 1) + dot_ = xl_w + self.bias[i] # W * xi + b + x_l = x_0 * dot_ + x_l # x0 · (W * xi + b) +xl Hadamard-product + else: # error + raise ValueError("parameterization should be 'vector' or 'matrix'") + x_l = tf.squeeze(x_l, axis=2) + return x_l + + def get_config(self, ): + + config = {'layer_num': self.layer_num, 'parameterization': self.parameterization, + 'l2_reg': self.l2_reg, 'seed': self.seed} + base_config = super(CrossNet, self).get_config() + base_config.update(config) + return base_config + + def compute_output_shape(self, input_shape): + return input_shape + + +class CrossNetMix(Layer): + """The Cross Network part of DCN-Mix model, which improves DCN-M by: + 1 add MOE to learn feature interactions in different subspaces + 2 add nonlinear transformations in low-dimensional space + + Input shape + - 2D tensor with shape: ``(batch_size, units)``. + + Output shape + - 2D tensor with shape: ``(batch_size, units)``. + + Arguments + - **low_rank** : Positive integer, dimensionality of low-rank sapce. + + - **num_experts** : Positive integer, number of experts. + + - **layer_num**: Positive integer, the cross layer number + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix + + - **seed**: A Python integer to use as random seed. + + References + - [Wang R, Shivanna R, Cheng D Z, et al. DCN-M: Improved Deep & Cross Network for Feature Cross Learning in Web-scale Learning to Rank Systems[J]. 2020.](https://arxiv.org/abs/2008.13535) + """ + + def __init__(self, low_rank=32, num_experts=4, layer_num=2, l2_reg=0, seed=1024, **kwargs): + self.low_rank = low_rank + self.num_experts = num_experts + self.layer_num = layer_num + self.l2_reg = l2_reg + self.seed = seed + super(CrossNetMix, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),)) + + dim = int(input_shape[-1]) + + # U: (dim, low_rank) + self.U_list = [self.add_weight(name='U_list' + str(i), + shape=(self.num_experts, dim, self.low_rank), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + # V: (dim, low_rank) + self.V_list = [self.add_weight(name='V_list' + str(i), + shape=(self.num_experts, dim, self.low_rank), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + # C: (low_rank, low_rank) + self.C_list = [self.add_weight(name='C_list' + str(i), + shape=(self.num_experts, self.low_rank, self.low_rank), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + + self.gating = [Dense(1, use_bias=False) for i in range(self.num_experts)] + + self.bias = [self.add_weight(name='bias' + str(i), + shape=(dim, 1), + initializer=Zeros(), + trainable=True) for i in range(self.layer_num)] + # Be sure to call this somewhere! + super(CrossNetMix, self).build(input_shape) + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs))) + + x_0 = tf.expand_dims(inputs, axis=2) + x_l = x_0 + for i in range(self.layer_num): + output_of_experts = [] + gating_score_of_experts = [] + for expert_id in range(self.num_experts): + # (1) G(x_l) + # compute the gating score by x_l + gating_score_of_experts.append(self.gating[expert_id](tf.squeeze(x_l, axis=2))) + + # (2) E(x_l) + # project the input x_l to $\mathbb{R}^{r}$ + v_x = tf.einsum('ij,bjk->bik', tf.transpose(self.V_list[i][expert_id]), x_l) # (bs, low_rank, 1) + + # nonlinear activation in low rank space + v_x = tf.nn.tanh(v_x) + v_x = tf.einsum('ij,bjk->bik', self.C_list[i][expert_id], v_x) # (bs, low_rank, 1) + v_x = tf.nn.tanh(v_x) + + # project back to $\mathbb{R}^{d}$ + uv_x = tf.einsum('ij,bjk->bik', self.U_list[i][expert_id], v_x) # (bs, dim, 1) + + dot_ = uv_x + self.bias[i] + dot_ = x_0 * dot_ # Hadamard-product + + output_of_experts.append(tf.squeeze(dot_, axis=2)) + + # (3) mixture of low-rank experts + output_of_experts = tf.stack(output_of_experts, 2) # (bs, dim, num_experts) + gating_score_of_experts = tf.stack(gating_score_of_experts, 1) # (bs, num_experts, 1) + moe_out = tf.matmul(output_of_experts, tf.nn.softmax(gating_score_of_experts, 1)) + x_l = moe_out + x_l # (bs, dim, 1) + x_l = tf.squeeze(x_l, axis=2) + return x_l + + def get_config(self, ): + + config = {'low_rank': self.low_rank, 'num_experts': self.num_experts, 'layer_num': self.layer_num, + 'l2_reg': self.l2_reg, 'seed': self.seed} + base_config = super(CrossNetMix, self).get_config() + base_config.update(config) + return base_config + + def compute_output_shape(self, input_shape): + return input_shape + + +class FM(Layer): + """Factorization Machine models pairwise (order-2) feature interactions + without linear term and bias. + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + + References + - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) + """ + + def __init__(self, **kwargs): + + super(FM, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError("Unexpected inputs dimensions % d,\ + expect to be 3 dimensions" % (len(input_shape))) + + super(FM, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" + % (K.ndim(inputs))) + + concated_embeds_value = inputs + + square_of_sum = tf.square(reduce_sum( + concated_embeds_value, axis=1, keep_dims=True)) + sum_of_square = reduce_sum( + concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True) + cross_term = square_of_sum - sum_of_square + cross_term = 0.5 * reduce_sum(cross_term, axis=2, keep_dims=False) + + return cross_term + + def compute_output_shape(self, input_shape): + return (None, 1) + + + +class InnerProductLayer(Layer): + """InnerProduct Layer used in PNN that compute the element-wise + product or inner product between feature vectors. + + Input shape + - a list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - 3D tensor with shape: ``(batch_size, N*(N-1)/2 ,1)`` if use reduce_sum. or 3D tensor with shape: ``(batch_size, N*(N-1)/2, embedding_size )`` if not use reduce_sum. + + Arguments + - **reduce_sum**: bool. Whether return inner product or element-wise product + + References + - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf) + """ + + def __init__(self, reduce_sum=True, **kwargs): + self.reduce_sum = reduce_sum + super(InnerProductLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `InnerProductLayer` layer should be called ' + 'on a list of at least 2 inputs') + + reduced_inputs_shapes = [shape.as_list() for shape in input_shape] + shape_set = set() + + for i in range(len(input_shape)): + shape_set.add(tuple(reduced_inputs_shapes[i])) + + if len(shape_set) > 1: + raise ValueError('A `InnerProductLayer` layer requires ' + 'inputs with same shapes ' + 'Got different shapes: %s' % (shape_set)) + + if len(input_shape[0]) != 3 or input_shape[0][1] != 1: + raise ValueError('A `InnerProductLayer` layer requires ' + 'inputs of a list with same shape tensor like (None,1,embedding_size)' + 'Got different shapes: %s' % (input_shape[0])) + super(InnerProductLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embed_list = inputs + row = [] + col = [] + num_inputs = len(embed_list) + + for i in range(num_inputs - 1): + for j in range(i + 1, num_inputs): + row.append(i) + col.append(j) + p = tf.concat([embed_list[idx] + for idx in row], axis=1) # batch num_pairs k + q = tf.concat([embed_list[idx] + for idx in col], axis=1) + + inner_product = p * q + if self.reduce_sum: + inner_product = reduce_sum( + inner_product, axis=2, keep_dims=True) + return inner_product + + def compute_output_shape(self, input_shape): + num_inputs = len(input_shape) + num_pairs = int(num_inputs * (num_inputs - 1) / 2) + input_shape = input_shape[0] + embed_size = input_shape[-1] + if self.reduce_sum: + return (input_shape[0], num_pairs, 1) + else: + return (input_shape[0], num_pairs, embed_size) + + def get_config(self, ): + config = {'reduce_sum': self.reduce_sum, } + base_config = super(InnerProductLayer, self).get_config() + base_config.update(config) + return base_config + + +class InteractingLayer(Layer): + """A Layer used in AutoInt that model the correlations between different feature fields by multi-head self-attention mechanism. + + Input shape + - A 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 3D tensor with shape:``(batch_size,field_size,att_embedding_size * head_num)``. + + + Arguments + - **att_embedding_size**: int.The embedding size in multi-head self-attention network. + - **head_num**: int.The head number in multi-head self-attention network. + - **use_res**: bool.Whether or not use standard residual connections before output. + - **seed**: A Python integer to use as random seed. + + References + - [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921) + """ + + def __init__(self, att_embedding_size=8, head_num=2, use_res=True, scaling=False, seed=1024, **kwargs): + if head_num <= 0: + raise ValueError('head_num must be a int > 0') + self.att_embedding_size = att_embedding_size + self.head_num = head_num + self.use_res = use_res + self.seed = seed + self.scaling = scaling + super(InteractingLayer, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + embedding_size = int(input_shape[-1]) + self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed)) + self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 1)) + self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 2)) + if self.use_res: + self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed)) + + # Be sure to call this somewhere! + super(InteractingLayer, self).build(input_shape) + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + querys = tf.tensordot(inputs, self.W_Query, + axes=(-1, 0)) # None F D*head_num + keys = tf.tensordot(inputs, self.W_key, axes=(-1, 0)) + values = tf.tensordot(inputs, self.W_Value, axes=(-1, 0)) + + # head_num None F D + querys = tf.stack(tf.split(querys, self.head_num, axis=2)) + keys = tf.stack(tf.split(keys, self.head_num, axis=2)) + values = tf.stack(tf.split(values, self.head_num, axis=2)) + + inner_product = tf.matmul( + querys, keys, transpose_b=True) # head_num None F F + if self.scaling: + inner_product /= self.att_embedding_size ** 0.5 + self.normalized_att_scores = softmax(inner_product) + + result = tf.matmul(self.normalized_att_scores, + values) # head_num None F D + result = tf.concat(tf.split(result, self.head_num, ), axis=-1) + result = tf.squeeze(result, axis=0) # None F D*head_num + + if self.use_res: + result += tf.tensordot(inputs, self.W_Res, axes=(-1, 0)) + result = tf.nn.relu(result) + + return result + + def compute_output_shape(self, input_shape): + + return (None, input_shape[1], self.att_embedding_size * self.head_num) + + def get_config(self, ): + config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, 'use_res': self.use_res, + 'seed': self.seed} + base_config = super(InteractingLayer, self).get_config() + base_config.update(config) + return base_config + + +class OutterProductLayer(Layer): + """OutterProduct Layer used in PNN.This implemention is + adapted from code that the author of the paper published on https://github.com/Atomu2014/product-nets. + + Input shape + - A list of N 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - 2D tensor with shape:``(batch_size,N*(N-1)/2 )``. + + Arguments + - **kernel_type**: str. The kernel weight matrix type to use,can be mat,vec or num + + - **seed**: A Python integer to use as random seed. + + References + - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf) + """ + + def __init__(self, kernel_type='mat', seed=1024, **kwargs): + if kernel_type not in ['mat', 'vec', 'num']: + raise ValueError("kernel_type must be mat,vec or num") + self.kernel_type = kernel_type + self.seed = seed + super(OutterProductLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `OutterProductLayer` layer should be called ' + 'on a list of at least 2 inputs') + + reduced_inputs_shapes = [shape.as_list() for shape in input_shape] + shape_set = set() + + for i in range(len(input_shape)): + shape_set.add(tuple(reduced_inputs_shapes[i])) + + if len(shape_set) > 1: + raise ValueError('A `OutterProductLayer` layer requires ' + 'inputs with same shapes ' + 'Got different shapes: %s' % (shape_set)) + + if len(input_shape[0]) != 3 or input_shape[0][1] != 1: + raise ValueError('A `OutterProductLayer` layer requires ' + 'inputs of a list with same shape tensor like (None,1,embedding_size)' + 'Got different shapes: %s' % (input_shape[0])) + num_inputs = len(input_shape) + num_pairs = int(num_inputs * (num_inputs - 1) / 2) + input_shape = input_shape[0] + embed_size = int(input_shape[-1]) + if self.kernel_type == 'mat': + + self.kernel = self.add_weight(shape=(embed_size, num_pairs, embed_size), + initializer=glorot_uniform( + seed=self.seed), + name='kernel') + elif self.kernel_type == 'vec': + self.kernel = self.add_weight(shape=(num_pairs, embed_size,), initializer=glorot_uniform(self.seed), + name='kernel' + ) + elif self.kernel_type == 'num': + self.kernel = self.add_weight( + shape=(num_pairs, 1), initializer=glorot_uniform(self.seed), name='kernel') + + super(OutterProductLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embed_list = inputs + row = [] + col = [] + num_inputs = len(embed_list) + for i in range(num_inputs - 1): + for j in range(i + 1, num_inputs): + row.append(i) + col.append(j) + p = tf.concat([embed_list[idx] + for idx in row], axis=1) # batch num_pairs k + # Reshape([num_pairs, self.embedding_size]) + q = tf.concat([embed_list[idx] for idx in col], axis=1) + + # ------------------------- + if self.kernel_type == 'mat': + p = tf.expand_dims(p, 1) + # k k* pair* k + # batch * pair + kp = reduce_sum( + + # batch * pair * k + + tf.multiply( + + # batch * pair * k + + tf.transpose( + + # batch * k * pair + + reduce_sum( + + # batch * k * pair * k + + tf.multiply( + + p, self.kernel), + + -1), + + [0, 2, 1]), + + q), + + -1) + else: + # 1 * pair * (k or 1) + + k = tf.expand_dims(self.kernel, 0) + + # batch * pair + + kp = reduce_sum(p * q * k, -1) + + # p q # b * p * k + + return kp + + def compute_output_shape(self, input_shape): + num_inputs = len(input_shape) + num_pairs = int(num_inputs * (num_inputs - 1) / 2) + return (None, num_pairs) + + def get_config(self, ): + config = {'kernel_type': self.kernel_type, 'seed': self.seed} + base_config = super(OutterProductLayer, self).get_config() + base_config.update(config) + return base_config + + +class FGCNNLayer(Layer): + """Feature Generation Layer used in FGCNN,including Convolution,MaxPooling and Recombination. + + Input shape + - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``. + + Output shape + - 3D tensor with shape: ``(batch_size,new_feture_num,embedding_size)``. + + References + - [Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019.](https://arxiv.org/pdf/1904.04447) + + """ + + def __init__(self, filters=(14, 16,), kernel_width=(7, 7,), new_maps=(3, 3,), pooling_width=(2, 2), + **kwargs): + if not (len(filters) == len(kernel_width) == len(new_maps) == len(pooling_width)): + raise ValueError("length of argument must be equal") + self.filters = filters + self.kernel_width = kernel_width + self.new_maps = new_maps + self.pooling_width = pooling_width + + super(FGCNNLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + self.conv_layers = [] + self.pooling_layers = [] + self.dense_layers = [] + pooling_shape = input_shape.as_list() + [1, ] + embedding_size = int(input_shape[-1]) + for i in range(1, len(self.filters) + 1): + filters = self.filters[i - 1] + width = self.kernel_width[i - 1] + new_filters = self.new_maps[i - 1] + pooling_width = self.pooling_width[i - 1] + conv_output_shape = self._conv_output_shape( + pooling_shape, (width, 1)) + pooling_shape = self._pooling_output_shape( + conv_output_shape, (pooling_width, 1)) + self.conv_layers.append(Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1), + padding='same', + activation='tanh', use_bias=True, )) + self.pooling_layers.append( + MaxPooling2D(pool_size=(pooling_width, 1))) + self.dense_layers.append(Dense(pooling_shape[1] * embedding_size * new_filters, + activation='tanh', use_bias=True)) + + self.flatten = Flatten() + + super(FGCNNLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embedding_size = int(inputs.shape[-1]) + pooling_result = tf.expand_dims(inputs, axis=3) + + new_feature_list = [] + + for i in range(1, len(self.filters) + 1): + new_filters = self.new_maps[i - 1] + + conv_result = self.conv_layers[i - 1](pooling_result) + + pooling_result = self.pooling_layers[i - 1](conv_result) + + flatten_result = self.flatten(pooling_result) + + new_result = self.dense_layers[i - 1](flatten_result) + + new_feature_list.append( + tf.reshape(new_result, (-1, int(pooling_result.shape[1]) * new_filters, embedding_size))) + + new_features = concat_func(new_feature_list, axis=1) + return new_features + + def compute_output_shape(self, input_shape): + + new_features_num = 0 + features_num = input_shape[1] + + for i in range(0, len(self.kernel_width)): + pooled_features_num = features_num // self.pooling_width[i] + new_features_num += self.new_maps[i] * pooled_features_num + features_num = pooled_features_num + + return (None, new_features_num, input_shape[-1]) + + def get_config(self, ): + config = {'kernel_width': self.kernel_width, 'filters': self.filters, 'new_maps': self.new_maps, + 'pooling_width': self.pooling_width} + base_config = super(FGCNNLayer, self).get_config() + base_config.update(config) + return base_config + + def _conv_output_shape(self, input_shape, kernel_size): + # channels_last + space = input_shape[1:-1] + new_space = [] + for i in range(len(space)): + new_dim = utils.conv_output_length( + space[i], + kernel_size[i], + padding='same', + stride=1, + dilation=1) + new_space.append(new_dim) + return ([input_shape[0]] + new_space + [self.filters]) + + def _pooling_output_shape(self, input_shape, pool_size): + # channels_last + + rows = input_shape[1] + cols = input_shape[2] + rows = utils.conv_output_length(rows, pool_size[0], 'valid', + pool_size[0]) + cols = utils.conv_output_length(cols, pool_size[1], 'valid', + pool_size[1]) + return [input_shape[0], rows, cols, input_shape[3]] + + +class SENETLayer(Layer): + """SENETLayer used in FiBiNET. + + Input shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Arguments + - **reduction_ratio** : Positive integer, dimensionality of the + attention network output space. + + - **seed** : A Python integer to use as random seed. + + References + - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) + """ + + def __init__(self, reduction_ratio=3, seed=1024, **kwargs): + self.reduction_ratio = reduction_ratio + + self.seed = seed + super(SENETLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `AttentionalFM` layer should be called ' + 'on a list of at least 2 inputs') + + self.filed_size = len(input_shape) + self.embedding_size = input_shape[0][-1] + reduction_size = max(1, self.filed_size // self.reduction_ratio) + + self.W_1 = self.add_weight(shape=( + self.filed_size, reduction_size), initializer=glorot_normal(seed=self.seed), name="W_1") + self.W_2 = self.add_weight(shape=( + reduction_size, self.filed_size), initializer=glorot_normal(seed=self.seed), name="W_2") + + self.tensordot = Lambda( + lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0))) + + # Be sure to call this somewhere! + super(SENETLayer, self).build(input_shape) + + def call(self, inputs, training=None, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + inputs = concat_func(inputs, axis=1) + Z = reduce_mean(inputs, axis=-1, ) + + A_1 = tf.nn.relu(self.tensordot([Z, self.W_1])) + A_2 = tf.nn.relu(self.tensordot([A_1, self.W_2])) + V = tf.multiply(inputs, tf.expand_dims(A_2, axis=2)) + + return tf.split(V, self.filed_size, axis=1) + + def compute_output_shape(self, input_shape): + + return input_shape + + def compute_mask(self, inputs, mask=None): + return [None] * self.filed_size + + def get_config(self, ): + config = {'reduction_ratio': self.reduction_ratio, 'seed': self.seed} + base_config = super(SENETLayer, self).get_config() + base_config.update(config) + return base_config + + +class BilinearInteraction(Layer): + """BilinearInteraction Layer used in FiBiNET. + + Input shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Its length is ``filed_size``. + + Output shape + - 3D tensor with shape: ``(batch_size,filed_size*(filed_size-1)/2,embedding_size)``. + + Arguments + - **bilinear_type** : String, types of bilinear functions used in this layer. + + - **seed** : A Python integer to use as random seed. + + References + - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) + + """ + + def __init__(self, bilinear_type="interaction", seed=1024, **kwargs): + self.bilinear_type = bilinear_type + self.seed = seed + + super(BilinearInteraction, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `AttentionalFM` layer should be called ' + 'on a list of at least 2 inputs') + embedding_size = int(input_shape[0][-1]) + + if self.bilinear_type == "all": + self.W = self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( + seed=self.seed), name="bilinear_weight") + elif self.bilinear_type == "each": + self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( + seed=self.seed), name="bilinear_weight" + str(i)) for i in range(len(input_shape) - 1)] + elif self.bilinear_type == "interaction": + self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( + seed=self.seed), name="bilinear_weight" + str(i) + '_' + str(j)) for i, j in + itertools.combinations(range(len(input_shape)), 2)] + else: + raise NotImplementedError + + super(BilinearInteraction, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + n = len(inputs) + if self.bilinear_type == "all": + vidots = [tf.tensordot(inputs[i], self.W, axes=(-1, 0)) for i in range(n)] + p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)] + elif self.bilinear_type == "each": + vidots = [tf.tensordot(inputs[i], self.W_list[i], axes=(-1, 0)) for i in range(n - 1)] + p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)] + elif self.bilinear_type == "interaction": + p = [tf.multiply(tf.tensordot(v[0], w, axes=(-1, 0)), v[1]) + for v, w in zip(itertools.combinations(inputs, 2), self.W_list)] + else: + raise NotImplementedError + output = concat_func(p, axis=1) + return output + + def compute_output_shape(self, input_shape): + filed_size = len(input_shape) + embedding_size = input_shape[0][-1] + + return (None, filed_size * (filed_size - 1) // 2, embedding_size) + + def get_config(self, ): + config = {'bilinear_type': self.bilinear_type, 'seed': self.seed} + base_config = super(BilinearInteraction, self).get_config() + base_config.update(config) + return base_config + + +class FieldWiseBiInteraction(Layer): + """Field-Wise Bi-Interaction Layer used in FLEN,compress the + pairwise element-wise product of features into one single vector. + + Input shape + - A list of 3D tensor with shape:``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size,embedding_size)``. + + Arguments + - **use_bias** : Boolean, if use bias. + - **seed** : A Python integer to use as random seed. + + References + - [FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690) + + """ + + def __init__(self, use_bias=True, seed=1024, **kwargs): + self.use_bias = use_bias + self.seed = seed + + super(FieldWiseBiInteraction, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError( + 'A `Field-Wise Bi-Interaction` layer should be called ' + 'on a list of at least 2 inputs') + + self.num_fields = len(input_shape) + embedding_size = input_shape[0][-1] + + self.kernel_mf = self.add_weight( + name='kernel_mf', + shape=(int(self.num_fields * (self.num_fields - 1) / 2), 1), + initializer=Ones(), + regularizer=None, + trainable=True) + + self.kernel_fm = self.add_weight( + name='kernel_fm', + shape=(self.num_fields, 1), + initializer=Constant(value=0.5), + regularizer=None, + trainable=True) + if self.use_bias: + self.bias_mf = self.add_weight(name='bias_mf', + shape=(embedding_size), + initializer=Zeros()) + self.bias_fm = self.add_weight(name='bias_fm', + shape=(embedding_size), + initializer=Zeros()) + + super(FieldWiseBiInteraction, + self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % + (K.ndim(inputs))) + + field_wise_embeds_list = inputs + + # MF module + field_wise_vectors = tf.concat([ + reduce_sum(field_i_vectors, axis=1, keep_dims=True) + for field_i_vectors in field_wise_embeds_list + ], 1) + + left = [] + right = [] + + for i, j in itertools.combinations(list(range(self.num_fields)), 2): + left.append(i) + right.append(j) + + embeddings_left = tf.gather(params=field_wise_vectors, + indices=left, + axis=1) + embeddings_right = tf.gather(params=field_wise_vectors, + indices=right, + axis=1) + + embeddings_prod = embeddings_left * embeddings_right + field_weighted_embedding = embeddings_prod * self.kernel_mf + h_mf = reduce_sum(field_weighted_embedding, axis=1) + if self.use_bias: + h_mf = tf.nn.bias_add(h_mf, self.bias_mf) + + # FM module + square_of_sum_list = [ + tf.square(reduce_sum(field_i_vectors, axis=1, keep_dims=True)) + for field_i_vectors in field_wise_embeds_list + ] + sum_of_square_list = [ + reduce_sum(field_i_vectors * field_i_vectors, + axis=1, + keep_dims=True) + for field_i_vectors in field_wise_embeds_list + ] + + field_fm = tf.concat([ + square_of_sum - sum_of_square for square_of_sum, sum_of_square in + zip(square_of_sum_list, sum_of_square_list) + ], 1) + + h_fm = reduce_sum(field_fm * self.kernel_fm, axis=1) + if self.use_bias: + h_fm = tf.nn.bias_add(h_fm, self.bias_fm) + + return h_mf + h_fm + + def compute_output_shape(self, input_shape): + return (None, input_shape[0][-1]) + + def get_config(self, ): + config = {'use_bias': self.use_bias, 'seed': self.seed} + base_config = super(FieldWiseBiInteraction, self).get_config() + base_config.update(config) + return base_config + + +class FwFMLayer(Layer): + """Field-weighted Factorization Machines + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + + Arguments + - **num_fields** : integer for number of fields + - **regularizer** : L2 regularizer weight for the field strength parameters of PNN + + References + - [Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising] + https://arxiv.org/pdf/1806.03514.pdf + """ + + def __init__(self, num_fields=4, regularizer=0.000001, **kwargs): + self.num_fields = num_fields + self.regularizer = regularizer + super(FwFMLayer, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError("Unexpected inputs dimensions % d,\ + expect to be 3 dimensions" % (len(input_shape))) + + if input_shape[1] != self.num_fields: + raise ValueError("Mismatch in number of fields {} and \ + concatenated embeddings dims {}".format(self.num_fields, input_shape[1])) + + self.field_strengths = self.add_weight(name='field_pair_strengths', + shape=(self.num_fields, self.num_fields), + initializer=TruncatedNormal(), + regularizer=l2(self.regularizer), + trainable=True) + + super(FwFMLayer, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" + % (K.ndim(inputs))) + + if inputs.shape[1] != self.num_fields: + raise ValueError("Mismatch in number of fields {} and \ + concatenated embeddings dims {}".format(self.num_fields, inputs.shape[1])) + + pairwise_inner_prods = [] + for fi, fj in itertools.combinations(range(self.num_fields), 2): + # get field strength for pair fi and fj + r_ij = self.field_strengths[fi, fj] + + # get embeddings for the features of both the fields + feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1) + feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1) + + f = tf.scalar_mul(r_ij, batch_dot(feat_embed_i, feat_embed_j, axes=1)) + pairwise_inner_prods.append(f) + + sum_ = tf.add_n(pairwise_inner_prods) + return sum_ + + def compute_output_shape(self, input_shape): + return (None, 1) + + def get_config(self): + config = super(FwFMLayer, self).get_config().copy() + config.update({ + 'num_fields': self.num_fields, + 'regularizer': self.regularizer + }) + return config + + +class FEFMLayer(Layer): + """Field-Embedded Factorization Machines + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: + ``(batch_size, (num_fields * (num_fields-1))/2)`` # concatenated FEFM interaction embeddings + + Arguments + - **regularizer** : L2 regularizer weight for the field pair matrix embeddings parameters of FEFM + + References + - [Field-Embedded Factorization Machines for Click-through Rate Prediction] + https://arxiv.org/pdf/2009.09931.pdf + """ + + def __init__(self, regularizer, **kwargs): + self.regularizer = regularizer + super(FEFMLayer, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError("Unexpected inputs dimensions % d,\ + expect to be 3 dimensions" % (len(input_shape))) + + self.num_fields = int(input_shape[1]) + embedding_size = int(input_shape[2]) + + self.field_embeddings = {} + for fi, fj in itertools.combinations(range(self.num_fields), 2): + field_pair_id = str(fi) + "-" + str(fj) + self.field_embeddings[field_pair_id] = self.add_weight(name='field_embeddings' + field_pair_id, + shape=(embedding_size, embedding_size), + initializer=TruncatedNormal(), + regularizer=l2(self.regularizer), + trainable=True) + + super(FEFMLayer, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" + % (K.ndim(inputs))) + + pairwise_inner_prods = [] + for fi, fj in itertools.combinations(range(self.num_fields), 2): + field_pair_id = str(fi) + "-" + str(fj) + feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1) + feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1) + field_pair_embed_ij = self.field_embeddings[field_pair_id] + + feat_embed_i_tr = tf.matmul(feat_embed_i, field_pair_embed_ij + tf.transpose(field_pair_embed_ij)) + + f = batch_dot(feat_embed_i_tr, feat_embed_j, axes=1) + pairwise_inner_prods.append(f) + + concat_vec = tf.concat(pairwise_inner_prods, axis=1) + return concat_vec + + def compute_output_shape(self, input_shape): + num_fields = int(input_shape[1]) + return (None, (num_fields * (num_fields - 1)) / 2) + + def get_config(self): + config = super(FEFMLayer, self).get_config().copy() + config.update({ + 'regularizer': self.regularizer, + }) + return config diff --git a/modelzoo/FwFM/script/layers/normalization.py b/modelzoo/FwFM/script/layers/normalization.py new file mode 100644 index 00000000000..3fceb1257d8 --- /dev/null +++ b/modelzoo/FwFM/script/layers/normalization.py @@ -0,0 +1,51 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +from tensorflow.python.keras import backend as K +from tensorflow.python.keras.layers import Layer + +try: + from tensorflow.python.ops.init_ops import Zeros, Ones +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros, Ones + + +class LayerNormalization(Layer): + def __init__(self, axis=-1, eps=1e-9, center=True, + scale=True, **kwargs): + self.axis = axis + self.eps = eps + self.center = center + self.scale = scale + super(LayerNormalization, self).__init__(**kwargs) + + def build(self, input_shape): + self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], + initializer=Ones(), trainable=True) + self.beta = self.add_weight(name='beta', shape=input_shape[-1:], + initializer=Zeros(), trainable=True) + super(LayerNormalization, self).build(input_shape) + + def call(self, inputs): + mean = K.mean(inputs, axis=self.axis, keepdims=True) + variance = K.mean(K.square(inputs - mean), axis=-1, keepdims=True) + std = K.sqrt(variance + self.eps) + outputs = (inputs - mean) / std + if self.scale: + outputs *= self.gamma + if self.center: + outputs += self.beta + return outputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'axis': self.axis, 'eps': self.eps, 'center': self.center, 'scale': self.scale} + base_config = super(LayerNormalization, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/modelzoo/FwFM/script/layers/sequence.py b/modelzoo/FwFM/script/layers/sequence.py new file mode 100644 index 00000000000..45a65915c22 --- /dev/null +++ b/modelzoo/FwFM/script/layers/sequence.py @@ -0,0 +1,901 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import numpy as np +import tensorflow as tf +from tensorflow.python.keras import backend as K + +try: + from tensorflow.python.ops.init_ops import TruncatedNormal, glorot_uniform_initializer as glorot_uniform, \ + identity_initializer as identity +except ImportError: + from tensorflow.python.ops.init_ops_v2 import TruncatedNormal, glorot_uniform, identity + +from tensorflow.python.keras.layers import LSTM, Lambda, Layer, Dropout + +from .core import LocalActivationUnit +from .normalization import LayerNormalization + +if tf.__version__ >= '2.0.0': + from ..contrib.rnn_v2 import dynamic_rnn +else: + from ..contrib.rnn import dynamic_rnn +from ..contrib.utils import QAAttGRUCell, VecAttGRUCell +from .utils import reduce_sum, reduce_max, div, softmax, reduce_mean + + +class SequencePoolingLayer(Layer): + """The SequencePoolingLayer is used to apply pooling operation(sum,mean,max) on variable-length sequence feature/multi-value feature. + + Input shape + - A list of two tensor [seq_value,seq_len] + + - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` + + - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence. + + Output shape + - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. + + Arguments + - **mode**:str.Pooling operation to be used,can be sum,mean or max. + + - **supports_masking**:If True,the input need to support masking. + """ + + def __init__(self, mode='mean', supports_masking=False, **kwargs): + + if mode not in ['sum', 'mean', 'max']: + raise ValueError("mode must be sum or mean") + self.mode = mode + self.eps = tf.constant(1e-8, tf.float32) + super(SequencePoolingLayer, self).__init__(**kwargs) + + self.supports_masking = supports_masking + + def build(self, input_shape): + if not self.supports_masking: + self.seq_len_max = int(input_shape[0][1]) + super(SequencePoolingLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, seq_value_len_list, mask=None, **kwargs): + if self.supports_masking: + if mask is None: + raise ValueError( + "When supports_masking=True,input must support masking") + uiseq_embed_list = seq_value_len_list + mask = tf.cast(mask, tf.float32) # tf.to_float(mask) + user_behavior_length = reduce_sum(mask, axis=-1, keep_dims=True) + mask = tf.expand_dims(mask, axis=2) + else: + uiseq_embed_list, user_behavior_length = seq_value_len_list + + mask = tf.sequence_mask(user_behavior_length, + self.seq_len_max, dtype=tf.float32) + mask = tf.transpose(mask, (0, 2, 1)) + + embedding_size = uiseq_embed_list.shape[-1] + + mask = tf.tile(mask, [1, 1, embedding_size]) + + if self.mode == "max": + hist = uiseq_embed_list - (1 - mask) * 1e9 + return reduce_max(hist, 1, keep_dims=True) + + hist = reduce_sum(uiseq_embed_list * mask, 1, keep_dims=False) + + if self.mode == "mean": + hist = div(hist, tf.cast(user_behavior_length, tf.float32) + self.eps) + + hist = tf.expand_dims(hist, axis=1) + return hist + + def compute_output_shape(self, input_shape): + if self.supports_masking: + return (None, 1, input_shape[-1]) + else: + return (None, 1, input_shape[0][-1]) + + def compute_mask(self, inputs, mask): + return None + + def get_config(self, ): + config = {'mode': self.mode, 'supports_masking': self.supports_masking} + base_config = super(SequencePoolingLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class WeightedSequenceLayer(Layer): + """The WeightedSequenceLayer is used to apply weight score on variable-length sequence feature/multi-value feature. + + Input shape + - A list of two tensor [seq_value,seq_len,seq_weight] + + - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` + + - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence. + + - seq_weight is a 3D tensor with shape: ``(batch_size, T, 1)`` + + Output shape + - 3D tensor with shape: ``(batch_size, T, embedding_size)``. + + Arguments + - **weight_normalization**: bool.Whether normalize the weight score before applying to sequence. + + - **supports_masking**:If True,the input need to support masking. + """ + + def __init__(self, weight_normalization=True, supports_masking=False, **kwargs): + super(WeightedSequenceLayer, self).__init__(**kwargs) + self.weight_normalization = weight_normalization + self.supports_masking = supports_masking + + def build(self, input_shape): + if not self.supports_masking: + self.seq_len_max = int(input_shape[0][1]) + super(WeightedSequenceLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, input_list, mask=None, **kwargs): + if self.supports_masking: + if mask is None: + raise ValueError( + "When supports_masking=True,input must support masking") + key_input, value_input = input_list + mask = tf.expand_dims(mask[0], axis=2) + else: + key_input, key_length_input, value_input = input_list + mask = tf.sequence_mask(key_length_input, + self.seq_len_max, dtype=tf.bool) + mask = tf.transpose(mask, (0, 2, 1)) + + embedding_size = key_input.shape[-1] + + if self.weight_normalization: + paddings = tf.ones_like(value_input) * (-2 ** 32 + 1) + else: + paddings = tf.zeros_like(value_input) + value_input = tf.where(mask, value_input, paddings) + + if self.weight_normalization: + value_input = softmax(value_input, dim=1) + + if len(value_input.shape) == 2: + value_input = tf.expand_dims(value_input, axis=2) + value_input = tf.tile(value_input, [1, 1, embedding_size]) + + return tf.multiply(key_input, value_input) + + def compute_output_shape(self, input_shape): + return input_shape[0] + + def compute_mask(self, inputs, mask): + if self.supports_masking: + return mask[0] + else: + return None + + def get_config(self, ): + config = {'weight_normalization': self.weight_normalization, 'supports_masking': self.supports_masking} + base_config = super(WeightedSequenceLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class AttentionSequencePoolingLayer(Layer): + """The Attentional sequence pooling operation used in DIN. + + Input shape + - A list of three tensor: [query,keys,keys_length] + + - query is a 3D tensor with shape: ``(batch_size, 1, embedding_size)`` + + - keys is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` + + - keys_length is a 2D tensor with shape: ``(batch_size, 1)`` + + Output shape + - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. + + Arguments + - **att_hidden_units**:list of positive integer, the attention net layer number and units in each layer. + + - **att_activation**: Activation function to use in attention net. + + - **weight_normalization**: bool.Whether normalize the attention score of local activation unit. + + - **supports_masking**:If True,the input need to support masking. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, att_hidden_units=(80, 40), att_activation='sigmoid', weight_normalization=False, + return_score=False, + supports_masking=False, **kwargs): + + self.att_hidden_units = att_hidden_units + self.att_activation = att_activation + self.weight_normalization = weight_normalization + self.return_score = return_score + super(AttentionSequencePoolingLayer, self).__init__(**kwargs) + self.supports_masking = supports_masking + + def build(self, input_shape): + if not self.supports_masking: + if not isinstance(input_shape, list) or len(input_shape) != 3: + raise ValueError('A `AttentionSequencePoolingLayer` layer should be called ' + 'on a list of 3 inputs') + + if len(input_shape[0]) != 3 or len(input_shape[1]) != 3 or len(input_shape[2]) != 2: + raise ValueError( + "Unexpected inputs dimensions,the 3 tensor dimensions are %d,%d and %d , expect to be 3,3 and 2" % ( + len(input_shape[0]), len(input_shape[1]), len(input_shape[2]))) + + if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1 or input_shape[2][1] != 1: + raise ValueError('A `AttentionSequencePoolingLayer` layer requires ' + 'inputs of a 3 tensor with shape (None,1,embedding_size),(None,T,embedding_size) and (None,1)' + 'Got different shapes: %s' % (input_shape)) + else: + pass + self.local_att = LocalActivationUnit( + self.att_hidden_units, self.att_activation, l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, ) + super(AttentionSequencePoolingLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, mask=None, training=None, **kwargs): + + if self.supports_masking: + if mask is None: + raise ValueError( + "When supports_masking=True,input must support masking") + queries, keys = inputs + key_masks = tf.expand_dims(mask[-1], axis=1) + + else: + + queries, keys, keys_length = inputs + hist_len = keys.get_shape()[1] + key_masks = tf.sequence_mask(keys_length, hist_len) + + attention_score = self.local_att([queries, keys], training=training) + + outputs = tf.transpose(attention_score, (0, 2, 1)) + + if self.weight_normalization: + paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) + else: + paddings = tf.zeros_like(outputs) + + outputs = tf.where(key_masks, outputs, paddings) + + if self.weight_normalization: + outputs = softmax(outputs) + + if not self.return_score: + outputs = tf.matmul(outputs, keys) + + if tf.__version__ < '1.13.0': + outputs._uses_learning_phase = attention_score._uses_learning_phase + else: + outputs._uses_learning_phase = training is not None + + return outputs + + def compute_output_shape(self, input_shape): + if self.return_score: + return (None, 1, input_shape[1][1]) + else: + return (None, 1, input_shape[0][-1]) + + def compute_mask(self, inputs, mask): + return None + + def get_config(self, ): + + config = {'att_hidden_units': self.att_hidden_units, 'att_activation': self.att_activation, + 'weight_normalization': self.weight_normalization, 'return_score': self.return_score, + 'supports_masking': self.supports_masking} + base_config = super(AttentionSequencePoolingLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class BiLSTM(Layer): + """A multiple layer Bidirectional Residual LSTM Layer. + + Input shape + - 3D tensor with shape ``(batch_size, timesteps, input_dim)``. + + Output shape + - 3D tensor with shape: ``(batch_size, timesteps, units)``. + + Arguments + - **units**: Positive integer, dimensionality of the output space. + + - **layers**:Positive integer, number of LSTM layers to stacked. + + - **res_layers**: Positive integer, number of residual connection to used in last ``res_layers``. + + - **dropout_rate**: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. + + - **merge_mode**: merge_mode: Mode by which outputs of the forward and backward RNNs will be combined. One of { ``'fw'`` , ``'bw'`` , ``'sum'`` , ``'mul'`` , ``'concat'`` , ``'ave'`` , ``None`` }. If None, the outputs will not be combined, they will be returned as a list. + + + """ + + def __init__(self, units, layers=2, res_layers=0, dropout_rate=0.2, merge_mode='ave', **kwargs): + + if merge_mode not in ['fw', 'bw', 'sum', 'mul', 'ave', 'concat', None]: + raise ValueError('Invalid merge mode. ' + 'Merge mode should be one of ' + '{"fw","bw","sum", "mul", "ave", "concat", None}') + + self.units = units + self.layers = layers + self.res_layers = res_layers + self.dropout_rate = dropout_rate + self.merge_mode = merge_mode + + super(BiLSTM, self).__init__(**kwargs) + self.supports_masking = True + + def build(self, input_shape): + + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + self.fw_lstm = [] + self.bw_lstm = [] + for _ in range(self.layers): + self.fw_lstm.append( + LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True, + unroll=True)) + self.bw_lstm.append( + LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True, + go_backwards=True, unroll=True)) + + super(BiLSTM, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, mask=None, **kwargs): + + input_fw = inputs + input_bw = inputs + for i in range(self.layers): + output_fw = self.fw_lstm[i](input_fw) + output_bw = self.bw_lstm[i](input_bw) + output_bw = Lambda(lambda x: K.reverse( + x, 1), mask=lambda inputs, mask: mask)(output_bw) + + if i >= self.layers - self.res_layers: + output_fw += input_fw + output_bw += input_bw + input_fw = output_fw + input_bw = output_bw + + output_fw = input_fw + output_bw = input_bw + + if self.merge_mode == "fw": + output = output_fw + elif self.merge_mode == "bw": + output = output_bw + elif self.merge_mode == 'concat': + output = K.concatenate([output_fw, output_bw]) + elif self.merge_mode == 'sum': + output = output_fw + output_bw + elif self.merge_mode == 'ave': + output = (output_fw + output_bw) / 2 + elif self.merge_mode == 'mul': + output = output_fw * output_bw + elif self.merge_mode is None: + output = [output_fw, output_bw] + + return output + + def compute_output_shape(self, input_shape): + print(self.merge_mode) + if self.merge_mode is None: + return [input_shape, input_shape] + elif self.merge_mode == 'concat': + return input_shape[:-1] + (input_shape[-1] * 2,) + else: + return input_shape + + def compute_mask(self, inputs, mask): + return mask + + def get_config(self, ): + + config = {'units': self.units, 'layers': self.layers, + 'res_layers': self.res_layers, 'dropout_rate': self.dropout_rate, 'merge_mode': self.merge_mode} + base_config = super(BiLSTM, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class Transformer(Layer): + """ Simplified version of Transformer proposed in 《Attention is all you need》 + + Input shape + - a list of two 3D tensor with shape ``(batch_size, timesteps, input_dim)`` if ``supports_masking=True`` . + - a list of two 4 tensors, first two tensors with shape ``(batch_size, timesteps, input_dim)``,last two tensors with shape ``(batch_size, 1)`` if ``supports_masking=False`` . + + + Output shape + - 3D tensor with shape: ``(batch_size, 1, input_dim)`` if ``output_type='mean'`` or ``output_type='sum'`` , else ``(batch_size, timesteps, input_dim)`` . + + + Arguments + - **att_embedding_size**: int.The embedding size in multi-head self-attention network. + - **head_num**: int.The head number in multi-head self-attention network. + - **dropout_rate**: float between 0 and 1. Fraction of the units to drop. + - **use_positional_encoding**: bool. Whether or not use positional_encoding + - **use_res**: bool. Whether or not use standard residual connections before output. + - **use_feed_forward**: bool. Whether or not use pointwise feed foward network. + - **use_layer_norm**: bool. Whether or not use Layer Normalization. + - **blinding**: bool. Whether or not use blinding. + - **seed**: A Python integer to use as random seed. + - **supports_masking**:bool. Whether or not support masking. + - **attention_type**: str, Type of attention, the value must be one of { ``'scaled_dot_product'`` , ``'additive'`` }. + - **output_type**: ``'mean'`` , ``'sum'`` or `None`. Whether or not use average/sum pooling for output. + + References + - [Vaswani, Ashish, et al. "Attention is all you need." Advances in Neural Information Processing Systems. 2017.](https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf) + """ + + def __init__(self, att_embedding_size=1, head_num=8, dropout_rate=0.0, use_positional_encoding=True, use_res=True, + use_feed_forward=True, use_layer_norm=False, blinding=True, seed=1024, supports_masking=False, + attention_type="scaled_dot_product", output_type="mean", **kwargs): + if head_num <= 0: + raise ValueError('head_num must be a int > 0') + self.att_embedding_size = att_embedding_size + self.head_num = head_num + self.num_units = att_embedding_size * head_num + self.use_res = use_res + self.use_feed_forward = use_feed_forward + self.seed = seed + self.use_positional_encoding = use_positional_encoding + self.dropout_rate = dropout_rate + self.use_layer_norm = use_layer_norm + self.blinding = blinding + self.attention_type = attention_type + self.output_type = output_type + super(Transformer, self).__init__(**kwargs) + self.supports_masking = supports_masking + + def build(self, input_shape): + embedding_size = int(input_shape[0][-1]) + if self.num_units != embedding_size: + raise ValueError( + "att_embedding_size * head_num must equal the last dimension size of inputs,got %d * %d != %d" % ( + self.att_embedding_size, self.head_num, embedding_size)) + self.seq_len_max = int(input_shape[0][-2]) + self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed)) + self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 1)) + self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 2)) + if self.attention_type == "additive": + self.b = self.add_weight('b', shape=[self.att_embedding_size], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + self.v = self.add_weight('v', shape=[self.att_embedding_size], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + # if self.use_res: + # self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, + # initializer=TruncatedNormal(seed=self.seed)) + if self.use_feed_forward: + self.fw1 = self.add_weight('fw1', shape=[self.num_units, 4 * self.num_units], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + self.fw2 = self.add_weight('fw2', shape=[4 * self.num_units, self.num_units], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + + self.dropout = Dropout( + self.dropout_rate, seed=self.seed) + self.ln = LayerNormalization() + if self.use_positional_encoding: + self.query_pe = PositionEncoding() + self.key_pe = PositionEncoding() + # Be sure to call this somewhere! + super(Transformer, self).build(input_shape) + + def call(self, inputs, mask=None, training=None, **kwargs): + + if self.supports_masking: + queries, keys = inputs + query_masks, key_masks = mask + query_masks = tf.cast(query_masks, tf.float32) + key_masks = tf.cast(key_masks, tf.float32) + else: + queries, keys, query_masks, key_masks = inputs + + query_masks = tf.sequence_mask( + query_masks, self.seq_len_max, dtype=tf.float32) + key_masks = tf.sequence_mask( + key_masks, self.seq_len_max, dtype=tf.float32) + query_masks = tf.squeeze(query_masks, axis=1) + key_masks = tf.squeeze(key_masks, axis=1) + + if self.use_positional_encoding: + queries = self.query_pe(queries) + keys = self.key_pe(queries) + + querys = tf.tensordot(queries, self.W_Query, + axes=(-1, 0)) # None T_q D*head_num + keys = tf.tensordot(keys, self.W_key, axes=(-1, 0)) + values = tf.tensordot(keys, self.W_Value, axes=(-1, 0)) + + # head_num*None T_q D + querys = tf.concat(tf.split(querys, self.head_num, axis=2), axis=0) + keys = tf.concat(tf.split(keys, self.head_num, axis=2), axis=0) + values = tf.concat(tf.split(values, self.head_num, axis=2), axis=0) + + if self.attention_type == "scaled_dot_product": + # head_num*None T_q T_k + outputs = tf.matmul(querys, keys, transpose_b=True) + + outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5) + elif self.attention_type == "additive": + querys_reshaped = tf.expand_dims(querys, axis=-2) + keys_reshaped = tf.expand_dims(keys, axis=-3) + outputs = tf.tanh(tf.nn.bias_add(querys_reshaped + keys_reshaped, self.b)) + outputs = tf.squeeze(tf.tensordot(outputs, tf.expand_dims(self.v, axis=-1), axes=[-1, 0]), axis=-1) + else: + raise ValueError("attention_type must be scaled_dot_product or additive") + + key_masks = tf.tile(key_masks, [self.head_num, 1]) + + # (h*N, T_q, T_k) + key_masks = tf.tile(tf.expand_dims(key_masks, 1), + [1, tf.shape(queries)[1], 1]) + + paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) + + # (h*N, T_q, T_k) + + outputs = tf.where(tf.equal(key_masks, 1), outputs, paddings, ) + if self.blinding: + try: + outputs = tf.matrix_set_diag(outputs, tf.ones_like(outputs)[ + :, :, 0] * (-2 ** 32 + 1)) + except AttributeError: + outputs = tf.compat.v1.matrix_set_diag(outputs, tf.ones_like(outputs)[ + :, :, 0] * (-2 ** 32 + 1)) + + outputs -= reduce_max(outputs, axis=-1, keep_dims=True) + outputs = softmax(outputs) + query_masks = tf.tile(query_masks, [self.head_num, 1]) # (h*N, T_q) + # (h*N, T_q, T_k) + query_masks = tf.tile(tf.expand_dims( + query_masks, -1), [1, 1, tf.shape(keys)[1]]) + + outputs *= query_masks + + outputs = self.dropout(outputs, training=training) + # Weighted sum + # ( h*N, T_q, C/h) + result = tf.matmul(outputs, values) + result = tf.concat(tf.split(result, self.head_num, axis=0), axis=2) + + if self.use_res: + # tf.tensordot(queries, self.W_Res, axes=(-1, 0)) + result += queries + if self.use_layer_norm: + result = self.ln(result) + + if self.use_feed_forward: + fw1 = tf.nn.relu(tf.tensordot(result, self.fw1, axes=[-1, 0])) + fw1 = self.dropout(fw1, training=training) + fw2 = tf.tensordot(fw1, self.fw2, axes=[-1, 0]) + if self.use_res: + result += fw2 + if self.use_layer_norm: + result = self.ln(result) + + if self.output_type == "mean": + return reduce_mean(result, axis=1, keep_dims=True) + elif self.output_type == "sum": + return reduce_sum(result, axis=1, keep_dims=True) + else: + return result + + def compute_output_shape(self, input_shape): + + return (None, 1, self.att_embedding_size * self.head_num) + + def compute_mask(self, inputs, mask=None): + return None + + def get_config(self, ): + config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, + 'dropout_rate': self.dropout_rate, 'use_res': self.use_res, + 'use_positional_encoding': self.use_positional_encoding, 'use_feed_forward': self.use_feed_forward, + 'use_layer_norm': self.use_layer_norm, 'seed': self.seed, 'supports_masking': self.supports_masking, + 'blinding': self.blinding, 'attention_type': self.attention_type, 'output_type': self.output_type} + base_config = super(Transformer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class PositionEncoding(Layer): + def __init__(self, pos_embedding_trainable=True, + zero_pad=False, + scale=True, **kwargs): + self.pos_embedding_trainable = pos_embedding_trainable + self.zero_pad = zero_pad + self.scale = scale + super(PositionEncoding, self).__init__(**kwargs) + + def build(self, input_shape): + # Create a trainable weight variable for this layer. + _, T, num_units = input_shape.as_list() # inputs.get_shape().as_list() + # First part of the PE function: sin and cos argument + position_enc = np.array([ + [pos / np.power(10000, 2. * (i // 2) / num_units) for i in range(num_units)] + for pos in range(T)]) + + # Second part, apply the cosine to even columns and sin to odds. + position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i + position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1 + if self.zero_pad: + position_enc[0, :] = np.zeros(num_units) + self.lookup_table = self.add_weight("lookup_table", (T, num_units), + initializer=identity(position_enc), + trainable=self.pos_embedding_trainable) + + # Be sure to call this somewhere! + super(PositionEncoding, self).build(input_shape) + + def call(self, inputs, mask=None): + _, T, num_units = inputs.get_shape().as_list() + position_ind = tf.expand_dims(tf.range(T), 0) + outputs = tf.nn.embedding_lookup(self.lookup_table, position_ind) + if self.scale: + outputs = outputs * num_units ** 0.5 + return outputs + inputs + + def compute_output_shape(self, input_shape): + + return input_shape + + def compute_mask(self, inputs, mask=None): + return mask + + def get_config(self, ): + + config = {'pos_embedding_trainable': self.pos_embedding_trainable, 'zero_pad': self.zero_pad, + 'scale': self.scale} + base_config = super(PositionEncoding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class BiasEncoding(Layer): + def __init__(self, sess_max_count, seed=1024, **kwargs): + self.sess_max_count = sess_max_count + self.seed = seed + super(BiasEncoding, self).__init__(**kwargs) + + def build(self, input_shape): + # Create a trainable weight variable for this layer. + + if self.sess_max_count == 1: + embed_size = input_shape[2].value + seq_len_max = input_shape[1].value + else: + try: + embed_size = input_shape[0][2].value + seq_len_max = input_shape[0][1].value + except AttributeError: + embed_size = input_shape[0][2] + seq_len_max = input_shape[0][1] + + self.sess_bias_embedding = self.add_weight('sess_bias_embedding', shape=(self.sess_max_count, 1, 1), + initializer=TruncatedNormal( + mean=0.0, stddev=0.0001, seed=self.seed)) + self.seq_bias_embedding = self.add_weight('seq_bias_embedding', shape=(1, seq_len_max, 1), + initializer=TruncatedNormal( + mean=0.0, stddev=0.0001, seed=self.seed)) + self.item_bias_embedding = self.add_weight('item_bias_embedding', shape=(1, 1, embed_size), + initializer=TruncatedNormal( + mean=0.0, stddev=0.0001, seed=self.seed)) + + # Be sure to call this somewhere! + super(BiasEncoding, self).build(input_shape) + + def call(self, inputs, mask=None): + """ + :param concated_embeds_value: None * field_size * embedding_size + :return: None*1 + """ + transformer_out = [] + for i in range(self.sess_max_count): + transformer_out.append( + inputs[i] + self.item_bias_embedding + self.seq_bias_embedding + self.sess_bias_embedding[i]) + return transformer_out + + def compute_output_shape(self, input_shape): + + return input_shape + + def compute_mask(self, inputs, mask=None): + return mask + + def get_config(self, ): + + config = {'sess_max_count': self.sess_max_count, 'seed': self.seed, } + base_config = super(BiasEncoding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class DynamicGRU(Layer): + def __init__(self, num_units=None, gru_type='GRU', return_sequence=True, **kwargs): + + self.num_units = num_units + self.return_sequence = return_sequence + self.gru_type = gru_type + super(DynamicGRU, self).__init__(**kwargs) + + def build(self, input_shape): + # Create a trainable weight variable for this layer. + input_seq_shape = input_shape[0] + if self.num_units is None: + self.num_units = input_seq_shape.as_list()[-1] + if self.gru_type == "AGRU": + self.gru_cell = QAAttGRUCell(self.num_units) + elif self.gru_type == "AUGRU": + self.gru_cell = VecAttGRUCell(self.num_units) + else: + try: + self.gru_cell = tf.nn.rnn_cell.GRUCell(self.num_units) # GRUCell + except AttributeError: + self.gru_cell = tf.compat.v1.nn.rnn_cell.GRUCell(self.num_units) + + # Be sure to call this somewhere! + super(DynamicGRU, self).build(input_shape) + + def call(self, input_list): + """ + :param concated_embeds_value: None * field_size * embedding_size + :return: None*1 + """ + if self.gru_type == "GRU" or self.gru_type == "AIGRU": + rnn_input, sequence_length = input_list + att_score = None + else: + rnn_input, sequence_length, att_score = input_list + + rnn_output, hidden_state = dynamic_rnn(self.gru_cell, inputs=rnn_input, att_scores=att_score, + sequence_length=tf.squeeze(sequence_length, + ), dtype=tf.float32, scope=self.name) + if self.return_sequence: + return rnn_output + else: + return tf.expand_dims(hidden_state, axis=1) + + def compute_output_shape(self, input_shape): + rnn_input_shape = input_shape[0] + if self.return_sequence: + return rnn_input_shape + else: + return (None, 1, rnn_input_shape[2]) + + def get_config(self, ): + config = {'num_units': self.num_units, 'gru_type': self.gru_type, 'return_sequence': self.return_sequence} + base_config = super(DynamicGRU, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class KMaxPooling(Layer): + """K Max pooling that selects the k biggest value along the specific axis. + + Input shape + - nD tensor with shape: ``(batch_size, ..., input_dim)``. + + Output shape + - nD tensor with shape: ``(batch_size, ..., output_dim)``. + + Arguments + - **k**: positive integer, number of top elements to look for along the ``axis`` dimension. + + - **axis**: positive integer, the dimension to look for elements. + + """ + + def __init__(self, k=1, axis=-1, **kwargs): + + self.k = k + self.axis = axis + super(KMaxPooling, self).__init__(**kwargs) + + def build(self, input_shape): + + if self.axis < 1 or self.axis > len(input_shape): + raise ValueError("axis must be 1~%d,now is %d" % + (len(input_shape), self.axis)) + + if self.k < 1 or self.k > input_shape[self.axis]: + raise ValueError("k must be in 1 ~ %d,now k is %d" % + (input_shape[self.axis], self.k)) + self.dims = len(input_shape) + # Be sure to call this somewhere! + super(KMaxPooling, self).build(input_shape) + + def call(self, inputs): + + # swap the last and the axis dimensions since top_k will be applied along the last dimension + perm = list(range(self.dims)) + perm[-1], perm[self.axis] = perm[self.axis], perm[-1] + shifted_input = tf.transpose(inputs, perm) + + # extract top_k, returns two tensors [values, indices] + top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0] + output = tf.transpose(top_k, perm) + + return output + + def compute_output_shape(self, input_shape): + output_shape = list(input_shape) + output_shape[self.axis] = self.k + return tuple(output_shape) + + def get_config(self, ): + config = {'k': self.k, 'axis': self.axis} + base_config = super(KMaxPooling, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + +# def positional_encoding(inputs, +# pos_embedding_trainable=True, +# zero_pad=False, +# scale=True, +# ): +# '''Sinusoidal Positional_Encoding. +# +# Args: +# +# - inputs: A 2d Tensor with shape of (N, T). +# - num_units: Output dimensionality +# - zero_pad: Boolean. If True, all the values of the first row (id = 0) should be constant zero +# - scale: Boolean. If True, the output will be multiplied by sqrt num_units(check details from paper) +# - scope: Optional scope for `variable_scope`. +# - reuse: Boolean, whether to reuse the weights of a previous layer by the same name. +# +# Returns: +# +# - A 'Tensor' with one more rank than inputs's, with the dimensionality should be 'num_units' +# ''' +# +# _, T, num_units = inputs.get_shape().as_list() +# # with tf.variable_scope(scope, reuse=reuse): +# position_ind = tf.expand_dims(tf.range(T), 0) +# # First part of the PE function: sin and cos argument +# position_enc = np.array([ +# [pos / np.power(10000, 2. * i / num_units) +# for i in range(num_units)] +# for pos in range(T)]) +# +# # Second part, apply the cosine to even columns and sin to odds. +# position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i +# position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1 +# +# # Convert to a tensor +# +# if pos_embedding_trainable: +# lookup_table = K.variable(position_enc, dtype=tf.float32) +# +# if zero_pad: +# lookup_table = tf.concat((tf.zeros(shape=[1, num_units]), +# lookup_table[1:, :]), 0) +# +# outputs = tf.nn.embedding_lookup(lookup_table, position_ind) +# +# if scale: +# outputs = outputs * num_units ** 0.5 +# return outputs + inputs diff --git a/modelzoo/FwFM/script/layers/utils.py b/modelzoo/FwFM/script/layers/utils.py new file mode 100644 index 00000000000..2be8f3fe5ef --- /dev/null +++ b/modelzoo/FwFM/script/layers/utils.py @@ -0,0 +1,302 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" +import tensorflow as tf +from tensorflow.python.keras.layers import Flatten, Concatenate, Layer, Add +from tensorflow.python.ops.lookup_ops import TextFileInitializer + +try: + from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal + +from tensorflow.python.keras.regularizers import l2 + +try: + from tensorflow.python.ops.lookup_ops import StaticHashTable +except ImportError: + from tensorflow.python.ops.lookup_ops import HashTable as StaticHashTable + + +class NoMask(Layer): + def __init__(self, **kwargs): + super(NoMask, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(NoMask, self).build(input_shape) + + def call(self, x, mask=None, **kwargs): + return x + + def compute_mask(self, inputs, mask): + return None + + +class Hash(Layer): + """Looks up keys in a table when setup `vocabulary_path`, which outputs the corresponding values. + If `vocabulary_path` is not set, `Hash` will hash the input to [0,num_buckets). When `mask_zero` = True, + input value `0` or `0.0` will be set to `0`, and other value will be set in range [1,num_buckets). + + The following snippet initializes a `Hash` with `vocabulary_path` file with the first column as keys and + second column as values: + + * `1,emerson` + * `2,lake` + * `3,palmer` + + >>> hash = Hash( + ... num_buckets=3+1, + ... vocabulary_path=filename, + ... default_value=0) + >>> hash(tf.constant('lake')).numpy() + 2 + >>> hash(tf.constant('lakeemerson')).numpy() + 0 + + Args: + num_buckets: An `int` that is >= 1. The number of buckets or the vocabulary size + 1 + when `vocabulary_path` is setup. + mask_zero: default is False. The `Hash` value will hash input `0` or `0.0` to value `0` when + the `mask_zero` is `True`. `mask_zero` is not used when `vocabulary_path` is setup. + vocabulary_path: default `None`. The `CSV` text file path of the vocabulary hash, which contains + two columns seperated by delimiter `comma`, the first column is the value and the second is + the key. The key data type is `string`, the value data type is `int`. The path must + be accessible from wherever `Hash` is initialized. + default_value: default '0'. The default value if a key is missing in the table. + **kwargs: Additional keyword arguments. + """ + + def __init__(self, num_buckets, mask_zero=False, vocabulary_path=None, default_value=0, **kwargs): + self.num_buckets = num_buckets + self.mask_zero = mask_zero + self.vocabulary_path = vocabulary_path + self.default_value = default_value + if self.vocabulary_path: + initializer = TextFileInitializer(vocabulary_path, 'string', 1, 'int64', 0, delimiter=',') + self.hash_table = StaticHashTable(initializer, default_value=self.default_value) + super(Hash, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(Hash, self).build(input_shape) + + def call(self, x, mask=None, **kwargs): + + if x.dtype != tf.string: + zero = tf.as_string(tf.zeros([1], dtype=x.dtype)) + x = tf.as_string(x, ) + else: + zero = tf.as_string(tf.zeros([1], dtype='int32')) + + if self.vocabulary_path: + hash_x = self.hash_table.lookup(x) + return hash_x + + num_buckets = self.num_buckets if not self.mask_zero else self.num_buckets - 1 + try: + hash_x = tf.string_to_hash_bucket_fast(x, num_buckets, + name=None) # weak hash + except AttributeError: + hash_x = tf.strings.to_hash_bucket_fast(x, num_buckets, + name=None) # weak hash + if self.mask_zero: + mask = tf.cast(tf.not_equal(x, zero), dtype='int64') + hash_x = (hash_x + 1) * mask + + return hash_x + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'num_buckets': self.num_buckets, 'mask_zero': self.mask_zero, 'vocabulary_path': self.vocabulary_path, + 'default_value': self.default_value} + base_config = super(Hash, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class Linear(Layer): + + def __init__(self, l2_reg=0.0, mode=0, use_bias=False, seed=1024, **kwargs): + + self.l2_reg = l2_reg + # self.l2_reg = tf.contrib.layers.l2_regularizer(float(l2_reg_linear)) + if mode not in [0, 1, 2]: + raise ValueError("mode must be 0,1 or 2") + self.mode = mode + self.use_bias = use_bias + self.seed = seed + super(Linear, self).__init__(**kwargs) + + def build(self, input_shape): + if self.use_bias: + self.bias = self.add_weight(name='linear_bias', + shape=(1,), + initializer=Zeros(), + trainable=True) + if self.mode == 1: + self.kernel = self.add_weight( + 'linear_kernel', + shape=[int(input_shape[-1]), 1], + initializer=glorot_normal(self.seed), + regularizer=l2(self.l2_reg), + trainable=True) + elif self.mode == 2: + self.kernel = self.add_weight( + 'linear_kernel', + shape=[int(input_shape[1][-1]), 1], + initializer=glorot_normal(self.seed), + regularizer=l2(self.l2_reg), + trainable=True) + + super(Linear, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if self.mode == 0: + sparse_input = inputs + linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=True) + elif self.mode == 1: + dense_input = inputs + fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0)) + linear_logit = fc + else: + sparse_input, dense_input = inputs + fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0)) + linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=False) + fc + if self.use_bias: + linear_logit += self.bias + + return linear_logit + + def compute_output_shape(self, input_shape): + return (None, 1) + + def compute_mask(self, inputs, mask): + return None + + def get_config(self, ): + config = {'mode': self.mode, 'l2_reg': self.l2_reg, 'use_bias': self.use_bias, 'seed': self.seed} + base_config = super(Linear, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def concat_func(inputs, axis=-1, mask=False): + if not mask: + inputs = list(map(NoMask(), inputs)) + if len(inputs) == 1: + return inputs[0] + else: + return Concatenate(axis=axis)(inputs) + + +def reduce_mean(input_tensor, + axis=None, + keep_dims=False, + name=None, + reduction_indices=None): + try: + return tf.reduce_mean(input_tensor, + axis=axis, + keep_dims=keep_dims, + name=name, + reduction_indices=reduction_indices) + except TypeError: + return tf.reduce_mean(input_tensor, + axis=axis, + keepdims=keep_dims, + name=name) + + +def reduce_sum(input_tensor, + axis=None, + keep_dims=False, + name=None, + reduction_indices=None): + try: + return tf.reduce_sum(input_tensor, + axis=axis, + keep_dims=keep_dims, + name=name, + reduction_indices=reduction_indices) + except TypeError: + return tf.reduce_sum(input_tensor, + axis=axis, + keepdims=keep_dims, + name=name) + + +def reduce_max(input_tensor, + axis=None, + keep_dims=False, + name=None, + reduction_indices=None): + try: + return tf.reduce_max(input_tensor, + axis=axis, + keep_dims=keep_dims, + name=name, + reduction_indices=reduction_indices) + except TypeError: + return tf.reduce_max(input_tensor, + axis=axis, + keepdims=keep_dims, + name=name) + + +def div(x, y, name=None): + try: + return tf.div(x, y, name=name) + except AttributeError: + return tf.divide(x, y, name=name) + + +def softmax(logits, dim=-1, name=None): + try: + return tf.nn.softmax(logits, dim=dim, name=name) + except TypeError: + return tf.nn.softmax(logits, axis=dim, name=name) + + +class _Add(Layer): + def __init__(self, **kwargs): + super(_Add, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(_Add, self).build(input_shape) + + def call(self, inputs, **kwargs): + # if not isinstance(inputs, list): + # return inputs + # if len(inputs) == 1: + # return inputs[0] + if len(inputs) == 0: + return tf.constant([[0.0]]) + + return Add()(inputs) + + +def add_func(inputs): + if not isinstance(inputs, list): + return inputs + if len(inputs) == 1: + return inputs[0] + return _Add()(inputs) + + +def combined_dnn_input(sparse_embedding_list, dense_value_list): + if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0: + sparse_dnn_input = Flatten()(concat_func(sparse_embedding_list)) + dense_dnn_input = Flatten()(concat_func(dense_value_list)) + return concat_func([sparse_dnn_input, dense_dnn_input]) + elif len(sparse_embedding_list) > 0: + return Flatten()(concat_func(sparse_embedding_list)) + elif len(dense_value_list) > 0: + return Flatten()(concat_func(dense_value_list)) + else: + raise NotImplementedError("dnn_feature_columns can not be empty list") diff --git a/modelzoo/FwFM/script/models/__init__.py b/modelzoo/FwFM/script/models/__init__.py new file mode 100644 index 00000000000..87868a82459 --- /dev/null +++ b/modelzoo/FwFM/script/models/__init__.py @@ -0,0 +1,3 @@ +from .fwfm import FwFM + +__all__ = ["FwFM"] diff --git a/modelzoo/FwFM/script/models/fwfm.py b/modelzoo/FwFM/script/models/fwfm.py new file mode 100644 index 00000000000..3646d8f1b0c --- /dev/null +++ b/modelzoo/FwFM/script/models/fwfm.py @@ -0,0 +1,72 @@ +# -*- coding:utf-8 -*- +""" +Author: + Harshit Pande + +Reference: + [1] Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising + (https://arxiv.org/pdf/1806.03514.pdf) + +""" + +from itertools import chain + +from tensorflow.python.keras.models import Model +from tensorflow.python.keras.layers import Dense + +from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns +from ..layers.core import PredictionLayer, DNN +from ..layers.interaction import FwFMLayer +from ..layers.utils import concat_func, add_func, combined_dnn_input + + +def FwFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64), + l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0, + seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): + """Instantiates the PNN Network architecture. + + :param linear_feature_columns: An iterable containing all the features used by linear part of the model. + :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. + :param fm_group: list, group_name of features that will be used to do feature interactions. + :param dnn_hidden_units: list,list of positive integer or empty list if do not want DNN, the layer number and units + in each layer of DNN + :param l2_reg_linear: float. L2 regularizer strength applied to linear part + :param l2_reg_field_strength: float. L2 regularizer strength applied to the field pair strength parameters + :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector + :param l2_reg_dnn: float. L2 regularizer strength applied to DNN + :param seed: integer ,to use as random seed. + :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. + :param dnn_activation: Activation function to use in DNN + :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN + :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + :return: A Keras model instance. + """ + + features = build_input_features(linear_feature_columns + dnn_feature_columns) + + inputs_list = list(features.values()) + + linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', + l2_reg=l2_reg_linear) + + group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, + l2_reg_embedding, seed, + support_group=True) + + fwfm_logit = add_func([FwFMLayer(num_fields=len(v), regularizer=l2_reg_field_strength) + (concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group]) + + final_logit_components = [linear_logit, fwfm_logit] + + if dnn_hidden_units: + dnn_input = combined_dnn_input(list(chain.from_iterable( + group_embedding_dict.values())), dense_value_list) + dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) + dnn_logit = Dense(1, use_bias=False)(dnn_output) + final_logit_components.append(dnn_logit) + + final_logit = add_func(final_logit_components) + + output = PredictionLayer(task)(final_logit) + model = Model(inputs=inputs_list, outputs=output) + return model diff --git a/modelzoo/FwFM/script/utils.py b/modelzoo/FwFM/script/utils.py new file mode 100644 index 00000000000..7fe3b25a518 --- /dev/null +++ b/modelzoo/FwFM/script/utils.py @@ -0,0 +1,46 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import json +import logging +from threading import Thread + +import requests + +try: + from packaging.version import parse +except ImportError: + from pip._vendor.packaging.version import parse + + +def check_version(version): + """Return version of package on pypi.python.org using json.""" + + def check(version): + try: + url_pattern = 'https://pypi.python.org/pypi/deepctr/json' + req = requests.get(url_pattern) + latest_version = parse('0') + version = parse(version) + if req.status_code == requests.codes.ok: + j = json.loads(req.text.encode('utf-8')) + releases = j.get('releases', []) + for release in releases: + ver = parse(release) + if ver.is_prerelease or ver.is_postrelease: + continue + latest_version = max(latest_version, ver) + if latest_version > version: + logging.warning( + '\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format( + latest_version, version)) + except: + print("Please check the latest version manually on https://pypi.org/project/deepctr/#history") + return + + Thread(target=check, args=(version,)).start() diff --git a/modelzoo/FwFM/train.py b/modelzoo/FwFM/train.py new file mode 100644 index 00000000000..bdfb56e983c --- /dev/null +++ b/modelzoo/FwFM/train.py @@ -0,0 +1,255 @@ +import os +import sys +import argparse +import pandas as pd +import numpy as np +import tensorflow as tf +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.losses import binary_crossentropy +from script.models.fwfm import FwFM +from script.feature_column import SparseFeat, DenseFeat, get_feature_names,VarLenSparseFeat + + + + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +UNSEQ_COLUMNS = ['UID', 'ITEM', 'CATEGORY'] +LABEL_COLUMN = ['CLICKED'] +TRAIN_DATA_COLUMNS = LABEL_COLUMN + UNSEQ_COLUMNS + +EMBEDDING_DIM=8 + +def split(x): + key_ans = x.split(',') + for key in key_ans: + if key not in key2index: + key2index[key] = len(key2index) + 1 + return list(map(lambda x: key2index[x], key_ans)) + + +#连续变量分箱处理 +def BinMap(data,acc): + if acc >=1 or acc<=0: + return print('acc must less than 1 and more than 0') + max = data.max() + min = data.min() + rangelist = [i+1 for i in range(int(1/acc))] + length = len(data)-1 + data1 = data.sort_index() + bin_res = np.array([0] * data.shape[-1], dtype=int) + for r in rangelist: + if r ==1: + lower = min + else: + lower = data1[int(length*((r-1)*acc))] + rank = r*acc + i = int(length*rank) + # x = data[np.where(data>=lower) + np.where(data=lower) & (data<=max)].index + else: + mask = data.loc[(data >= lower) & (data Date: Wed, 12 Oct 2022 15:57:26 +0800 Subject: [PATCH 8/8] [ModelZoo] Support PNN --- modelzoo/PNN/README.md | 85 + modelzoo/PNN/data/README.md | 4 + modelzoo/PNN/data/prepare_data.sh | 15 + modelzoo/PNN/data/script/data2labelencode.py | 54 + modelzoo/PNN/data/script/generate_neg.py | 63 + modelzoo/PNN/data/script/generate_voc.py | 66 + .../PNN/data/script/history_behavior_list.py | 41 + modelzoo/PNN/data/script/item_map.py | 29 + modelzoo/PNN/data/script/local_aggretor.py | 47 + modelzoo/PNN/data/script/pick2txt.py | 14 + modelzoo/PNN/data/script/process_data.py | 108 ++ modelzoo/PNN/data/script/split_by_user.py | 18 + modelzoo/PNN/result/README.md | 2 + modelzoo/PNN/script/__init__.py | 0 modelzoo/PNN/script/contrib/__init__.py | 0 modelzoo/PNN/script/contrib/rnn.py | 1153 +++++++++++++ modelzoo/PNN/script/contrib/rnn_v2.py | 1452 ++++++++++++++++ modelzoo/PNN/script/contrib/utils.py | 378 +++++ modelzoo/PNN/script/estimator/__init__.py | 1 + .../PNN/script/estimator/feature_column.py | 52 + modelzoo/PNN/script/estimator/inputs.py | 52 + .../PNN/script/estimator/models/__init__.py | 13 + modelzoo/PNN/script/estimator/models/pnn.py | 93 + modelzoo/PNN/script/estimator/utils.py | 217 +++ modelzoo/PNN/script/feature_column.py | 249 +++ modelzoo/PNN/script/inputs.py | 155 ++ modelzoo/PNN/script/layers/__init__.py | 52 + modelzoo/PNN/script/layers/activation.py | 85 + modelzoo/PNN/script/layers/core.py | 267 +++ modelzoo/PNN/script/layers/interaction.py | 1492 +++++++++++++++++ modelzoo/PNN/script/layers/normalization.py | 51 + modelzoo/PNN/script/layers/sequence.py | 901 ++++++++++ modelzoo/PNN/script/layers/utils.py | 302 ++++ modelzoo/PNN/script/models/__init__.py | 3 + modelzoo/PNN/script/models/pnn.py | 72 + modelzoo/PNN/script/utils.py | 46 + modelzoo/PNN/train.py | 259 +++ 37 files changed, 7891 insertions(+) create mode 100644 modelzoo/PNN/README.md create mode 100644 modelzoo/PNN/data/README.md create mode 100644 modelzoo/PNN/data/prepare_data.sh create mode 100644 modelzoo/PNN/data/script/data2labelencode.py create mode 100644 modelzoo/PNN/data/script/generate_neg.py create mode 100644 modelzoo/PNN/data/script/generate_voc.py create mode 100644 modelzoo/PNN/data/script/history_behavior_list.py create mode 100644 modelzoo/PNN/data/script/item_map.py create mode 100644 modelzoo/PNN/data/script/local_aggretor.py create mode 100644 modelzoo/PNN/data/script/pick2txt.py create mode 100644 modelzoo/PNN/data/script/process_data.py create mode 100644 modelzoo/PNN/data/script/split_by_user.py create mode 100644 modelzoo/PNN/result/README.md create mode 100644 modelzoo/PNN/script/__init__.py create mode 100644 modelzoo/PNN/script/contrib/__init__.py create mode 100644 modelzoo/PNN/script/contrib/rnn.py create mode 100644 modelzoo/PNN/script/contrib/rnn_v2.py create mode 100644 modelzoo/PNN/script/contrib/utils.py create mode 100644 modelzoo/PNN/script/estimator/__init__.py create mode 100644 modelzoo/PNN/script/estimator/feature_column.py create mode 100644 modelzoo/PNN/script/estimator/inputs.py create mode 100644 modelzoo/PNN/script/estimator/models/__init__.py create mode 100644 modelzoo/PNN/script/estimator/models/pnn.py create mode 100644 modelzoo/PNN/script/estimator/utils.py create mode 100644 modelzoo/PNN/script/feature_column.py create mode 100644 modelzoo/PNN/script/inputs.py create mode 100644 modelzoo/PNN/script/layers/__init__.py create mode 100644 modelzoo/PNN/script/layers/activation.py create mode 100644 modelzoo/PNN/script/layers/core.py create mode 100644 modelzoo/PNN/script/layers/interaction.py create mode 100644 modelzoo/PNN/script/layers/normalization.py create mode 100644 modelzoo/PNN/script/layers/sequence.py create mode 100644 modelzoo/PNN/script/layers/utils.py create mode 100644 modelzoo/PNN/script/models/__init__.py create mode 100644 modelzoo/PNN/script/models/pnn.py create mode 100644 modelzoo/PNN/script/utils.py create mode 100644 modelzoo/PNN/train.py diff --git a/modelzoo/PNN/README.md b/modelzoo/PNN/README.md new file mode 100644 index 00000000000..0d02cde5540 --- /dev/null +++ b/modelzoo/PNN/README.md @@ -0,0 +1,85 @@ +# PNN + +The following is a brief directory structure and description for this example: + + + +``` +├── data # Data set directory +│ ├── prepare_data.sh # Shell script to download and process dataset +│ └── README.md # Documentation describing how to prepare dataset +│ └──script # Directory contains scripts to process dataset +│ ├──data2labelencode # Convert data to csv file +│ ├── generate_neg.py # Create negative sample +│ ├── generate_voc.py # Create a list of features +│ ├── history_behavior_list.py # Count user's history behaviors +│ ├── item_map.py # Create a map between item id and cate +│ ├── local_aggretor.py # Generate sample data +│ ├── pick2txt.py # Convert voc's format +│ ├── process_data.py # Parse raw json data +│ └── split_by_user.py # Divide the dataset +├── script # model set directory +│ ├── contrib #Directory contains rnn +│ ├── estimator #Directory contains estimator to data +│ ├── layers #Directory contains layers of model +│ ├── models #Directory contains FNN model +│ ├── feature_column.py # Feature marker +│ ├── inputs.py #Construction of Input Layer +│ └──utils +├── train.py # Training script +└── README.md # Documentation +``` + + + +## Content + +[TOC] + + + +## Model Structure + +Implementation of paper "Product-based neural networks for user response prediction". + + + +## Usage + +### Stand-alone Training + +1. Please prepare the data set and DeepRec env. + + 1. Manually + + - Follow [dataset preparation](https://github.com/alibaba/DeepRec/tree/main/modelzoo/DIEN#prepare) to prepare data set. + - Download code by `git clone https://github.com/alibaba/DeepRec` + - Follow [How to Build](https://github.com/alibaba/DeepRec#how-to-build) to build DeepRec whl package and install by `pip install $DEEPREC_WHL`. + + 2. Docker(Recommended) + + ``` + docker pull alideeprec/deeprec-release-modelzoo:latest + docker run -it alideeprec/deeprec-release-modelzoo:latest /bin/bash + + # In docker container + cd /root/modelzoo/CAN + ``` + +​ 2.train. + +``` + python train.py +``` + +​ + + + +## Dataset + + Amazon Dataset Books dataset is used as benchmark dataset. + +### Prepare + +For details of Data download, see [Data Preparation](https://github.com/Atomu2014/make-ipinyou-data) diff --git a/modelzoo/PNN/data/README.md b/modelzoo/PNN/data/README.md new file mode 100644 index 00000000000..15a0bc61c8d --- /dev/null +++ b/modelzoo/PNN/data/README.md @@ -0,0 +1,4 @@ +make-ipinyou-data +================= + +For details of Data download, see [Data Preparation](https://github.com/Atomu2014/make-ipinyou-data) diff --git a/modelzoo/PNN/data/prepare_data.sh b/modelzoo/PNN/data/prepare_data.sh new file mode 100644 index 00000000000..49fdb9a0da1 --- /dev/null +++ b/modelzoo/PNN/data/prepare_data.sh @@ -0,0 +1,15 @@ +wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Books.json.gz +wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz +gunzip reviews_Books.json.gz +gunzip meta_Books.json.gz + +python script/process_data.py meta_Books.json reviews_Books.json +python script/local_aggretor.py +python script/split_by_user.py +python script/generate_voc.py + +python script/item_map.py +python script/history_behavior_list.py +python script/generate_neg.py + +python script/data2labelencode.py \ No newline at end of file diff --git a/modelzoo/PNN/data/script/data2labelencode.py b/modelzoo/PNN/data/script/data2labelencode.py new file mode 100644 index 00000000000..04daba5e28a --- /dev/null +++ b/modelzoo/PNN/data/script/data2labelencode.py @@ -0,0 +1,54 @@ +import pandas as pd +import numpy as np +import pickle + +UNSEQ_COLUMNS = ['UID', 'ITEM', 'CATEGORY'] +HIS_COLUMNS = ['HISTORY_ITEM', 'HISTORY_CATEGORY'] +SEQ_COLUMNS = HIS_COLUMNS +LABEL_COLUMN = ['CLICKED'] +TRAIN_DATA_COLUMNS = LABEL_COLUMN + UNSEQ_COLUMNS + SEQ_COLUMNS + + + +def inputs_to_labelencode(filename): + def encoder_dict(data, category_col): + category_dict = data[category_col].value_counts() + category_dict = pd.Series(np.arange(0, len(category_dict)), index=category_dict.index).to_dict() + data[category_col + '_encode'] = data[category_col].map(category_dict).astype('int32') + return data + + uid_file = '../CAN/data/uid_voc.txt' + mid_file = '../CAN/data/mid_voc.txt' + cat_file = '../CAN/data/cat_voc.txt' + + uid_data = pd.read_csv(uid_file, encoding="utf-8", header=None, names=['UID']) + mid_data = pd.read_csv(mid_file, encoding="utf-8", header=None, names=['ITEM']) + cat_data = pd.read_csv(cat_file, encoding="utf-8", header=None, names=['CATEGORY']) + + uid_data = encoder_dict(uid_data, 'UID') + mid_data = encoder_dict(mid_data, 'ITEM') + cat_data = encoder_dict(cat_data, 'CATEGORY') + + dataset = pd.read_csv(filename, encoding="utf-8", + header=None, names=TRAIN_DATA_COLUMNS, sep="\t", low_memory=False) + for key in ['UID','ITEM','CATEGORY']: + if key=='UID': + dataset = pd.merge(dataset, uid_data, on=key, how='inner') + elif key=='ITEM': + dataset = pd.merge(dataset, mid_data, on=key, how='inner') + else: + dataset = pd.merge(dataset, cat_data, on=key, how='inner') + + dataset = dataset.drop(UNSEQ_COLUMNS + SEQ_COLUMNS, axis=1) + + dataset.to_csv(filename + '_to_labelencode.txt',index=0,header=0) + uid_data.to_csv('dataset/uid_labelencode.csv',index=False) + mid_data.to_csv('dataset/mid_labelencode.csv',index=False) + cat_data.to_csv('dataset/cat_labelencode.csv',index=False) + + + +if __name__ == '__main__': + inputs_to_labelencode('../CAN/data/local_train_splitByUser') + inputs_to_labelencode('../CAN/data/local_test_splitByUser') + diff --git a/modelzoo/PNN/data/script/generate_neg.py b/modelzoo/PNN/data/script/generate_neg.py new file mode 100644 index 00000000000..a10ef919e13 --- /dev/null +++ b/modelzoo/PNN/data/script/generate_neg.py @@ -0,0 +1,63 @@ +import random + +NEG_SEQ_LENGTH_FOR_EACH_HISTORY_ITEM = 1 + + +def createNegData(file): + with open(file, 'r') as f_raw: + with open(file + '_neg', 'w') as f_out: + FirstLine = True + for line in f_raw: + linelist = line.strip().split('\t') + uid = linelist[1] + + if uid not in user_history_behavior: + str = '\t' + else: + his_items = linelist[4].split('') + neg_items_str = '' + neg_cates_str = '' + for pos in his_items: + tmp_items_str = '' + tmp_cates_str = '' + tmp_items = [] + tmp_cates = [] + neg_length = 0 + while (True): + index = random.randint( + 0, + len(user_history_behavior[uid][0]) - 1) + if user_history_behavior[uid][0][index] != pos: + tmp_items.append( + user_history_behavior[uid][0][index]) + tmp_cates.append( + user_history_behavior[uid][1][index]) + neg_length += 1 + if neg_length >= NEG_SEQ_LENGTH_FOR_EACH_HISTORY_ITEM: + break + for item in tmp_items: + tmp_items_str += (item + '') + for cate in tmp_cates: + tmp_cates_str += (cate + '') + neg_items_str += (tmp_items_str[:-1] + '') + neg_cates_str += (tmp_cates_str[:-1] + '') + str = neg_items_str[:-1] + '\t' + neg_cates_str[:-1] + if FirstLine: + f_out.write(str) + FirstLine = False + else: + f_out.write('\n' + str) + + +user_history_behavior = {} +with open('user_history_behavior.txt', 'r') as f: + for line in f: + linelist = line.strip().split('\t') + uid = linelist[0] + items = linelist[1].split('') + cates = linelist[2].split('') + user_history_behavior[uid] = [items, cates] + +data_file = ['local_test_splitByUser', 'local_train_splitByUser'] +for file in data_file: + createNegData(file) diff --git a/modelzoo/PNN/data/script/generate_voc.py b/modelzoo/PNN/data/script/generate_voc.py new file mode 100644 index 00000000000..447fe6393b7 --- /dev/null +++ b/modelzoo/PNN/data/script/generate_voc.py @@ -0,0 +1,66 @@ +# import cPickle +import pickle as cPickle + +f_train = open("local_train_splitByUser", "r") +uid_dict = {} +mid_dict = {} +cat_dict = {} + +iddd = 0 +for line in f_train: + arr = line.strip("\n").split("\t") + clk = arr[0] + uid = arr[1] + mid = arr[2] + cat = arr[3] + mid_list = arr[4] + cat_list = arr[5] + if uid not in uid_dict: + uid_dict[uid] = 0 + uid_dict[uid] += 1 + if mid not in mid_dict: + mid_dict[mid] = 0 + mid_dict[mid] += 1 + if cat not in cat_dict: + cat_dict[cat] = 0 + cat_dict[cat] += 1 + if len(mid_list) == 0: + continue + for m in mid_list.split(""): + if m not in mid_dict: + mid_dict[m] = 0 + mid_dict[m] += 1 + #print iddd + iddd+=1 + for c in cat_list.split(""): + if c not in cat_dict: + cat_dict[c] = 0 + cat_dict[c] += 1 + +sorted_uid_dict = sorted(uid_dict.items(), key=lambda x:x[1], reverse=True) +sorted_mid_dict = sorted(mid_dict.items(), key=lambda x:x[1], reverse=True) +sorted_cat_dict = sorted(cat_dict.items(), key=lambda x:x[1], reverse=True) + +uid_voc = {} +index = 0 +for key, value in sorted_uid_dict: + uid_voc[key] = index + index += 1 + +mid_voc = {} +mid_voc["default_mid"] = 0 +index = 1 +for key, value in sorted_mid_dict: + mid_voc[key] = index + index += 1 + +cat_voc = {} +cat_voc["default_cat"] = 0 +index = 1 +for key, value in sorted_cat_dict: + cat_voc[key] = index + index += 1 + +cPickle.dump(uid_voc, open("uid_voc.pkl", "wb")) +cPickle.dump(mid_voc, open("mid_voc.pkl", "wb")) +cPickle.dump(cat_voc, open("cat_voc.pkl", "wb")) diff --git a/modelzoo/PNN/data/script/history_behavior_list.py b/modelzoo/PNN/data/script/history_behavior_list.py new file mode 100644 index 00000000000..6adaf398cef --- /dev/null +++ b/modelzoo/PNN/data/script/history_behavior_list.py @@ -0,0 +1,41 @@ +item_to_cate_map = {} +with open('item2catmap.txt', 'r') as f: + for line in f: + linelist = line.strip().split('\t') + item = linelist[0] + cate = linelist[1] + item_to_cate_map[item] = cate + +user_history_behavior = {} +with open('reviews-info', 'r') as f: + for line in f: + linelist = line.strip().split('\t') + uid = linelist[0] + item = linelist[1] + if uid not in user_history_behavior: + user_history_behavior[uid] = [item] + else: + if item not in user_history_behavior[uid]: + user_history_behavior[uid].append(item) + +FirstLine = True +with open('user_history_behavior.txt', 'w') as f: + for uid, items in user_history_behavior.items(): + itemstr = '' + catestr = '' + for i in items: + if i in item_to_cate_map: + c = item_to_cate_map[i] + else: + c = 'Unknown' + if not itemstr: + itemstr += i + catestr += c + else: + itemstr += ('' + i) + catestr += ('' + c) + if FirstLine: + f.write(uid + '\t' + itemstr + '\t' + catestr) + FirstLine = False + else: + f.write('\n' + uid + '\t' + itemstr + '\t' + catestr) diff --git a/modelzoo/PNN/data/script/item_map.py b/modelzoo/PNN/data/script/item_map.py new file mode 100644 index 00000000000..94bebee5184 --- /dev/null +++ b/modelzoo/PNN/data/script/item_map.py @@ -0,0 +1,29 @@ +import sys +from tqdm import tqdm + +data_file = ['local_test_splitByUser', 'local_train_splitByUser'] + +item_to_cate_map = {} +# 367983 +for file_name in data_file: + with open(file_name, 'r') as f: + for line in f: + linelist = line.strip().split('\t') + items = linelist[4].split('') + cates = linelist[5].split('') + items.append(linelist[2]) + cates.append(linelist[3]) + # print(items) + # print(cates) + for index, item in enumerate(items): + if item not in item_to_cate_map: + item_to_cate_map[item] = cates[index] + +with open('item2catmap.txt', 'w') as f: + firstline = True + for item, cate in item_to_cate_map.items(): + if firstline: + f.write(item + '\t' + cate) + firstline = False + else: + f.write('\n' + item + '\t' + cate) diff --git a/modelzoo/PNN/data/script/local_aggretor.py b/modelzoo/PNN/data/script/local_aggretor.py new file mode 100644 index 00000000000..1fd8aceb32c --- /dev/null +++ b/modelzoo/PNN/data/script/local_aggretor.py @@ -0,0 +1,47 @@ +import sys +import hashlib +import random + +fin = open("jointed-new-split-info", "r") +ftrain = open("local_train", "w") +ftest = open("local_test", "w") + +last_user = "0" +common_fea = "" +line_idx = 0 +for line in fin: + items = line.strip().split("\t") + ds = items[0] + clk = int(items[1]) + user = items[2] + movie_id = items[3] + dt = items[5] + cat1 = items[6] + + if ds == "20180118": + fo = ftrain + else: + fo = ftest + if user != last_user: + movie_id_list = [] + cate1_list = [] + #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + "" + else: + history_clk_num = len(movie_id_list) + cat_str = "" + mid_str = "" + for c1 in cate1_list: + cat_str += c1 + "" + for mid in movie_id_list: + mid_str += mid + "" + if len(cat_str) > 0: cat_str = cat_str[:-1] + if len(mid_str) > 0: mid_str = mid_str[:-1] + if history_clk_num >= 1: # 8 is the average length of user behavior + print(items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 + + "\t" + mid_str + "\t" + cat_str, + file=fo) + last_user = user + if clk: + movie_id_list.append(movie_id) + cate1_list.append(cat1) + line_idx += 1 diff --git a/modelzoo/PNN/data/script/pick2txt.py b/modelzoo/PNN/data/script/pick2txt.py new file mode 100644 index 00000000000..b7c129ffbe0 --- /dev/null +++ b/modelzoo/PNN/data/script/pick2txt.py @@ -0,0 +1,14 @@ +import pickle + +def pkl2txt(filename): + pklfile = pickle.load(open(filename+'.pkl', 'rb')) + with open(filename+'.txt','w') as f: + f.write('\n'.join(pklfile)) + + + + +if __name__ == '__main__': + pkl2txt('uid_voc') + pkl2txt('mid_voc') + pkl2txt('cat_voc') \ No newline at end of file diff --git a/modelzoo/PNN/data/script/process_data.py b/modelzoo/PNN/data/script/process_data.py new file mode 100644 index 00000000000..0bff64f30bd --- /dev/null +++ b/modelzoo/PNN/data/script/process_data.py @@ -0,0 +1,108 @@ +import sys +import random +import time + + +def process_meta(file): + fi = open(file, "r") + fo = open("item-info", "w") + for line in fi: + obj = eval(line) + cat = obj["categories"][0][-1] + print(obj["asin"] + "\t" + cat, file=fo) + + +def process_reviews(file): + fi = open(file, "r") + user_map = {} + fo = open("reviews-info", "w") + for line in fi: + obj = eval(line) + userID = obj["reviewerID"] + itemID = obj["asin"] + rating = obj["overall"] + time = obj["unixReviewTime"] + print(userID + "\t" + itemID + "\t" + str(rating) + "\t" + str(time), + file=fo) + + +def manual_join(): + f_rev = open("reviews-info", "r") + user_map = {} + item_list = [] + for line in f_rev: + line = line.strip() + items = line.split("\t") + #loctime = time.localtime(float(items[-1])) + #items[-1] = time.strftime('%Y-%m-%d', loctime) + if items[0] not in user_map: + user_map[items[0]] = [] + user_map[items[0]].append(("\t".join(items), float(items[-1]))) + item_list.append(items[1]) + f_meta = open("item-info", "r") + meta_map = {} + for line in f_meta: + arr = line.strip().split("\t") + if arr[0] not in meta_map: + meta_map[arr[0]] = arr[1] + arr = line.strip().split("\t") + fo = open("jointed-new", "w") + for key in user_map: + sorted_user_bh = sorted(user_map[key], key=lambda x: x[1]) + for line, t in sorted_user_bh: + items = line.split("\t") + asin = items[1] + j = 0 + while True: + asin_neg_index = random.randint(0, len(item_list) - 1) + asin_neg = item_list[asin_neg_index] + if asin_neg == asin: + continue + items[1] = asin_neg + print("0" + "\t" + "\t".join(items) + "\t" + + meta_map[asin_neg], + file=fo) + j += 1 + if j == 1: #negative sampling frequency + break + if asin in meta_map: + print("1" + "\t" + line + "\t" + meta_map[asin], file=fo) + else: + print("1" + "\t" + line + "\t" + "default_cat", file=fo) + + +def split_test(): + fi = open("jointed-new", "r") + fo = open("jointed-new-split-info", "w") + user_count = {} + for line in fi: + line = line.strip() + user = line.split("\t")[1] + if user not in user_count: + user_count[user] = 0 + user_count[user] += 1 + fi.seek(0) + i = 0 + last_user = "A26ZDKC53OP6JD" + for line in fi: + line = line.strip() + user = line.split("\t")[1] + if user == last_user: + if i < user_count[user] - 2: # 1 + negative samples + print("20180118" + "\t" + line, file=fo) + else: + print("20190119" + "\t" + line, file=fo) + else: + last_user = user + i = 0 + if i < user_count[user] - 2: + print("20180118" + "\t" + line, file=fo) + else: + print("20190119" + "\t" + line, file=fo) + i += 1 + + +process_meta(sys.argv[1]) +process_reviews(sys.argv[2]) +manual_join() +split_test() diff --git a/modelzoo/PNN/data/script/split_by_user.py b/modelzoo/PNN/data/script/split_by_user.py new file mode 100644 index 00000000000..cc7988c6601 --- /dev/null +++ b/modelzoo/PNN/data/script/split_by_user.py @@ -0,0 +1,18 @@ +import random + +fi = open("local_test", "r") +ftrain = open("local_train_splitByUser", "w") +ftest = open("local_test_splitByUser", "w") + +while True: + rand_int = random.randint(1, 10) + noclk_line = fi.readline().strip() + clk_line = fi.readline().strip() + if noclk_line == "" or clk_line == "": + break + if rand_int == 2: + print(noclk_line, file=ftest) + print(clk_line, file=ftest) + else: + print(noclk_line, file=ftrain) + print(clk_line, file=ftrain) diff --git a/modelzoo/PNN/result/README.md b/modelzoo/PNN/result/README.md new file mode 100644 index 00000000000..ccec44eb9a5 --- /dev/null +++ b/modelzoo/PNN/result/README.md @@ -0,0 +1,2 @@ +# Result +Checkpoint & timeline file are default saved in this folder. diff --git a/modelzoo/PNN/script/__init__.py b/modelzoo/PNN/script/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/modelzoo/PNN/script/contrib/__init__.py b/modelzoo/PNN/script/contrib/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/modelzoo/PNN/script/contrib/rnn.py b/modelzoo/PNN/script/contrib/rnn.py new file mode 100644 index 00000000000..b3554993063 --- /dev/null +++ b/modelzoo/PNN/script/contrib/rnn.py @@ -0,0 +1,1153 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +# + +# Licensed under the Apache License, Version 2.0 (the "License"); + +# you may not use this file except in compliance with the License. + +# You may obtain a copy of the License at + +# + +# http://www.apache.org/licenses/LICENSE-2.0 + +# + +# Unless required by applicable law or agreed to in writing, software + +# distributed under the License is distributed on an "AS IS" BASIS, + +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +# See the License for the specific language governing permissions and + +# limitations under the License. + +# ============================================================================== + + +"""RNN helpers for TensorFlow models. +@@bidirectional_dynamic_rnn +@@dynamic_rnn +@@raw_rnn +@@static_rnn +@@static_state_saving_rnn +@@static_bidirectional_rnn +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util import nest +import tensorflow as tf + + +def _like_rnncell_(cell): + """Checks that a given object is an RNNCell by using duck typing.""" + + conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"), + + hasattr(cell, "zero_state"), callable(cell)] + + return all(conditions) + + +# pylint: disable=protected-access + +_concat = rnn_cell_impl._concat +try: + _like_rnncell = rnn_cell_impl._like_rnncell +except Exception as e: + _like_rnncell = _like_rnncell_ + + +# pylint: enable=protected-access + + +def _transpose_batch_time(x): + """Transpose the batch and time dimensions of a Tensor. + Retains as much of the static shape information as possible. + Args: + x: A tensor of rank 2 or higher. + Returns: + x transposed along the first two dimensions. + Raises: + ValueError: if `x` is rank 1 or lower. + """ + + x_static_shape = x.get_shape() + + if x_static_shape.ndims is not None and x_static_shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2, but saw shape: %s" % + + (x, x_static_shape)) + + x_rank = array_ops.rank(x) + + x_t = array_ops.transpose( + + x, array_ops.concat( + + ([1, 0], math_ops.range(2, x_rank)), axis=0)) + + x_t.set_shape( + + tensor_shape.TensorShape([ + + x_static_shape[1].value, x_static_shape[0].value + + ]).concatenate(x_static_shape[2:])) + + return x_t + + +def _best_effort_input_batch_size(flat_input): + """Get static input batch size if available, with fallback to the dynamic one. + Args: + flat_input: An iterable of time major input Tensors of shape [max_time, + batch_size, ...]. All inputs should have compatible batch sizes. + Returns: + The batch size in Python integer if available, or a scalar Tensor otherwise. + Raises: + ValueError: if there is any input with an invalid shape. + """ + + for input_ in flat_input: + + shape = input_.shape + + if shape.ndims is None: + continue + + if shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2" % input_) + + batch_size = shape[1].value + + if batch_size is not None: + return batch_size + + # Fallback to the dynamic batch size of the first input. + + return array_ops.shape(flat_input[0])[1] + + +def _infer_state_dtype(explicit_dtype, state): + """Infer the dtype of an RNN state. + Args: + explicit_dtype: explicitly declared dtype or None. + state: RNN's hidden state. Must be a Tensor or a nested iterable containing + Tensors. + Returns: + dtype: inferred dtype of hidden state. + Raises: + ValueError: if `state` has heterogeneous dtypes or is empty. + """ + + if explicit_dtype is not None: + + return explicit_dtype + + elif nest.is_sequence(state): + + inferred_dtypes = [element.dtype for element in nest.flatten(state)] + + if not inferred_dtypes: + raise ValueError("Unable to infer dtype from empty state.") + + all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) + + if not all_same: + raise ValueError( + + "State has tensors of different inferred_dtypes. Unable to infer a " + + "single representative dtype.") + + return inferred_dtypes[0] + + else: + + return state.dtype + + +# pylint: disable=unused-argument + +def _rnn_step( + + time, sequence_length, min_sequence_length, max_sequence_length, + + zero_output, state, call_cell, state_size, skip_conditionals=False): + """Calculate one step of a dynamic RNN minibatch. + Returns an (output, state) pair conditioned on the sequence_lengths. + When skip_conditionals=False, the pseudocode is something like: + if t >= max_sequence_length: + return (zero_output, state) + if t < min_sequence_length: + return call_cell() + # Selectively output zeros or output, old state or new state depending + # on if we've finished calculating each row. + new_output, new_state = call_cell() + final_output = np.vstack([ + zero_output if time >= sequence_lengths[r] else new_output_r + for r, new_output_r in enumerate(new_output) + ]) + final_state = np.vstack([ + state[r] if time >= sequence_lengths[r] else new_state_r + for r, new_state_r in enumerate(new_state) + ]) + return (final_output, final_state) + Args: + time: Python int, the current time step + sequence_length: int32 `Tensor` vector of size [batch_size] + min_sequence_length: int32 `Tensor` scalar, min of sequence_length + max_sequence_length: int32 `Tensor` scalar, max of sequence_length + zero_output: `Tensor` vector of shape [output_size] + state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, + or a list/tuple of such tensors. + call_cell: lambda returning tuple of (new_output, new_state) where + new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. + new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. + state_size: The `cell.state_size` associated with the state. + skip_conditionals: Python bool, whether to skip using the conditional + calculations. This is useful for `dynamic_rnn`, where the input tensor + matches `max_sequence_length`, and using conditionals just slows + everything down. + Returns: + A tuple of (`final_output`, `final_state`) as given by the pseudocode above: + final_output is a `Tensor` matrix of shape [batch_size, output_size] + final_state is either a single `Tensor` matrix, or a tuple of such + matrices (matching length and shapes of input `state`). + Raises: + ValueError: If the cell returns a state tuple whose length does not match + that returned by `state_size`. + """ + + # Convert state to a list for ease of use + + flat_state = nest.flatten(state) + + flat_zero_output = nest.flatten(zero_output) + + def _copy_one_through(output, new_output): + + # If the state contains a scalar value we simply pass it through. + + if output.shape.ndims == 0: + return new_output + + copy_cond = (time >= sequence_length) + + with ops.colocate_with(new_output): + return array_ops.where(copy_cond, output, new_output) + + def _copy_some_through(flat_new_output, flat_new_state): + + # Use broadcasting select to determine which values should get + + # the previous state & zero output, and which values should get + + # a calculated state & output. + + flat_new_output = [ + + _copy_one_through(zero_output, new_output) + + for zero_output, new_output in zip(flat_zero_output, flat_new_output)] + + flat_new_state = [ + + _copy_one_through(state, new_state) + + for state, new_state in zip(flat_state, flat_new_state)] + + return flat_new_output + flat_new_state + + def _maybe_copy_some_through(): + + """Run RNN step. Pass through either no or some past state.""" + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + flat_new_state = nest.flatten(new_state) + + flat_new_output = nest.flatten(new_output) + + return control_flow_ops.cond( + + # if t < min_seq_len: calculate and return everything + + time < min_sequence_length, lambda: flat_new_output + flat_new_state, + + # else copy some of it through + + lambda: _copy_some_through(flat_new_output, flat_new_state)) + + # TODO(ebrevdo): skipping these conditionals may cause a slowdown, + + # but benefits from removing cond() and its gradient. We should + + # profile with and without this switch here. + + if skip_conditionals: + + # Instead of using conditionals, perform the selective copy at all time + + # steps. This is faster when max_seq_len is equal to the number of unrolls + + # (which is typical for dynamic_rnn). + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + new_state = nest.flatten(new_state) + + new_output = nest.flatten(new_output) + + final_output_and_state = _copy_some_through(new_output, new_state) + + else: + + empty_update = lambda: flat_zero_output + flat_state + + final_output_and_state = control_flow_ops.cond( + + # if t >= max_seq_len: copy all state through, output zeros + + time >= max_sequence_length, empty_update, + + # otherwise calculation is required: copy some or all of it through + + _maybe_copy_some_through) + + if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): + raise ValueError("Internal error: state and output were not concatenated " + + "correctly.") + + final_output = final_output_and_state[:len(flat_zero_output)] + + final_state = final_output_and_state[len(flat_zero_output):] + + for output, flat_output in zip(final_output, flat_zero_output): + output.set_shape(flat_output.get_shape()) + + for substate, flat_substate in zip(final_state, flat_state): + substate.set_shape(flat_substate.get_shape()) + + final_output = nest.pack_sequence_as( + + structure=zero_output, flat_sequence=final_output) + + final_state = nest.pack_sequence_as( + + structure=state, flat_sequence=final_state) + + return final_output, final_state + + +def _reverse_seq(input_seq, lengths): + """Reverse a list of Tensors up to specified lengths. + Args: + input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) + or nested tuples of tensors. + lengths: A `Tensor` of dimension batch_size, containing lengths for each + sequence in the batch. If "None" is specified, simply reverses + the list. + Returns: + time-reversed sequence + """ + + if lengths is None: + return list(reversed(input_seq)) + + flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) + + flat_results = [[] for _ in range(len(input_seq))] + + for sequence in zip(*flat_input_seq): + + input_shape = tensor_shape.unknown_shape( + + ndims=sequence[0].get_shape().ndims) + + for input_ in sequence: + input_shape.merge_with(input_.get_shape()) + + input_.set_shape(input_shape) + + # Join into (time, batch_size, depth) + + s_joined = array_ops.stack(sequence) + + # Reverse along dimension 0 + + s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) + + # Split again into list + + result = array_ops.unstack(s_reversed) + + for r, flat_result in zip(result, flat_results): + r.set_shape(input_shape) + + flat_result.append(r) + + results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) + + for input_, flat_result in zip(input_seq, flat_results)] + + return results + + +# +# def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, +# +# initial_state_fw=None, initial_state_bw=None, +# +# dtype=None, parallel_iterations=None, +# +# swap_memory=False, time_major=False, scope=None): +# +# """Creates a dynamic version of bidirectional recurrent neural network. +# +# +# +# Takes input and builds independent forward and backward RNNs. The input_size +# +# of forward and backward cell must match. The initial state for both directions +# +# is zero by default (but can be set optionally) and no intermediate states are +# +# ever returned -- the network is fully unrolled for the given (passed in) +# +# length(s) of the sequence(s) or completely unrolled if length(s) is not +# +# given. +# +# +# +# Args: +# +# cell_fw: An instance of RNNCell, to be used for forward direction. +# +# cell_bw: An instance of RNNCell, to be used for backward direction. +# +# inputs: The RNN inputs. +# +# If time_major == False (default), this must be a tensor of shape: +# +# `[batch_size, max_time, ...]`, or a nested tuple of such elements. +# +# If time_major == True, this must be a tensor of shape: +# +# `[max_time, batch_size, ...]`, or a nested tuple of such elements. +# +# sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, +# +# containing the actual lengths for each of the sequences in the batch. +# +# If not provided, all batch entries are assumed to be full sequences; and +# +# time reversal is applied from time `0` to `max_time` for each sequence. +# +# initial_state_fw: (optional) An initial state for the forward RNN. +# +# This must be a tensor of appropriate type and shape +# +# `[batch_size, cell_fw.state_size]`. +# +# If `cell_fw.state_size` is a tuple, this should be a tuple of +# +# tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. +# +# initial_state_bw: (optional) Same as for `initial_state_fw`, but using +# +# the corresponding properties of `cell_bw`. +# +# dtype: (optional) The data type for the initial states and expected output. +# +# Required if initial_states are not provided or RNN states have a +# +# heterogeneous dtype. +# +# parallel_iterations: (Default: 32). The number of iterations to run in +# +# parallel. Those operations which do not have any temporal dependency +# +# and can be run in parallel, will be. This parameter trades off +# +# time for space. Values >> 1 use more memory but take less time, +# +# while smaller values use less memory but computations take longer. +# +# swap_memory: Transparently swap the tensors produced in forward inference +# +# but needed for back prop from GPU to CPU. This allows training RNNs +# +# which would typically not fit on a single GPU, with very minimal (or no) +# +# performance penalty. +# +# time_major: The shape format of the `inputs` and `outputs` Tensors. +# +# If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. +# +# If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. +# +# Using `time_major = True` is a bit more efficient because it avoids +# +# transposes at the beginning and end of the RNN calculation. However, +# +# most TensorFlow data is batch-major, so by default this function +# +# accepts input and emits output in batch-major form. +# +# scope: VariableScope for the created subgraph; defaults to +# +# "bidirectional_rnn" +# +# +# +# Returns: +# +# A tuple (outputs, output_states) where: +# +# outputs: A tuple (output_fw, output_bw) containing the forward and +# +# the backward rnn output `Tensor`. +# +# If time_major == False (default), +# +# output_fw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_bw.output_size]`. +# +# If time_major == True, +# +# output_fw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_bw.output_size]`. +# +# It returns a tuple instead of a single concatenated `Tensor`, unlike +# +# in the `bidirectional_rnn`. If the concatenated one is preferred, +# +# the forward and backward outputs can be concatenated as +# +# `tf.concat(outputs, 2)`. +# +# output_states: A tuple (output_state_fw, output_state_bw) containing +# +# the forward and the backward final states of bidirectional rnn. +# +# +# +# Raises: +# +# TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. +# +# """ +# +# +# +# if not _like_rnncell(cell_fw): +# +# raise TypeError("cell_fw must be an instance of RNNCell") +# +# if not _like_rnncell(cell_bw): +# +# raise TypeError("cell_bw must be an instance of RNNCell") +# +# +# +# with vs.variable_scope(scope or "bidirectional_rnn"): +# +# # Forward direction +# +# with vs.variable_scope("fw") as fw_scope: +# +# output_fw, output_state_fw = dynamic_rnn( +# +# cell=cell_fw, inputs=inputs, sequence_length=sequence_length, +# +# initial_state=initial_state_fw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=fw_scope) +# +# +# +# # Backward direction +# +# if not time_major: +# +# time_dim = 1 +# +# batch_dim = 0 +# +# else: +# +# time_dim = 0 +# +# batch_dim = 1 +# +# +# +# def _reverse(input_, seq_lengths, seq_dim, batch_dim): +# +# if seq_lengths is not None: +# +# return array_ops.reverse_sequence( +# +# input=input_, seq_lengths=seq_lengths, +# +# seq_dim=seq_dim, batch_dim=batch_dim) +# +# else: +# +# return array_ops.reverse(input_, axis=[seq_dim]) +# +# +# +# with vs.variable_scope("bw") as bw_scope: +# +# inputs_reverse = _reverse( +# +# inputs, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# tmp, output_state_bw = dynamic_rnn( +# +# cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, +# +# initial_state=initial_state_bw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=bw_scope) +# +# +# +# output_bw = _reverse( +# +# tmp, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# +# +# outputs = (output_fw, output_bw) +# +# output_states = (output_state_fw, output_state_bw) +# +# +# +# return (outputs, output_states) +# + + +def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, + + dtype=None, parallel_iterations=None, swap_memory=False, + + time_major=False, scope=None): + """Creates a recurrent neural network specified by RNNCell `cell`. + Performs fully dynamic unrolling of `inputs`. + Example: + ```python + # create a BasicRNNCell + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] + # defining initial state + initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) + # 'state' is a tensor of shape [batch_size, cell_state_size] + outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, + initial_state=initial_state, + dtype=tf.float32) + ``` + ```python + # create 2 LSTMCells + rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] + # create a RNN cell composed sequentially of a number of RNNCells + multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) + # 'outputs' is a tensor of shape [batch_size, max_time, 256] + # 'state' is a N-tuple where N is the number of LSTMCells containing a + # tf.contrib.rnn.LSTMStateTuple for each cell + outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, + inputs=data, + dtype=tf.float32) + ``` + Args: + cell: An instance of RNNCell. + inputs: The RNN inputs. + If `time_major == False` (default), this must be a `Tensor` of shape: + `[batch_size, max_time, ...]`, or a nested tuple of such + elements. + If `time_major == True`, this must be a `Tensor` of shape: + `[max_time, batch_size, ...]`, or a nested tuple of such + elements. + This may also be a (possibly nested) tuple of Tensors satisfying + this property. The first two dimensions must match across all the inputs, + but otherwise the ranks and other shape components may differ. + In this case, input to `cell` at each time-step will replicate the + structure of these tuples, except for the time dimension (from which the + time is taken). + The input to `cell` at each time step will be a `Tensor` or (possibly + nested) tuple of Tensors each with dimensions `[batch_size, ...]`. + sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. + Used to copy-through state and zero-out outputs when past a batch + element's sequence length. So it's more for correctness than performance. + initial_state: (optional) An initial state for the RNN. + If `cell.state_size` is an integer, this must be + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. + If `cell.state_size` is a tuple, this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell.state_size`. + dtype: (optional) The data type for the initial state and expected output. + Required if initial_state is not provided or RNN state has a heterogeneous + dtype. + parallel_iterations: (Default: 32). The number of iterations to run in + parallel. Those operations which do not have any temporal dependency + and can be run in parallel, will be. This parameter trades off + time for space. Values >> 1 use more memory but take less time, + while smaller values use less memory but computations take longer. + swap_memory: Transparently swap the tensors produced in forward inference + but needed for back prop from GPU to CPU. This allows training RNNs + which would typically not fit on a single GPU, with very minimal (or no) + performance penalty. + time_major: The shape format of the `inputs` and `outputs` Tensors. + If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. + If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. + Using `time_major = True` is a bit more efficient because it avoids + transposes at the beginning and end of the RNN calculation. However, + most TensorFlow data is batch-major, so by default this function + accepts input and emits output in batch-major form. + scope: VariableScope for the created subgraph; defaults to "rnn". + Returns: + A pair (outputs, state) where: + outputs: The RNN output `Tensor`. + If time_major == False (default), this will be a `Tensor` shaped: + `[batch_size, max_time, cell.output_size]`. + If time_major == True, this will be a `Tensor` shaped: + `[max_time, batch_size, cell.output_size]`. + Note, if `cell.output_size` is a (possibly nested) tuple of integers + or `TensorShape` objects, then `outputs` will be a tuple having the + same structure as `cell.output_size`, containing Tensors having shapes + corresponding to the shape data in `cell.output_size`. + state: The final state. If `cell.state_size` is an int, this + will be shaped `[batch_size, cell.state_size]`. If it is a + `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. + If it is a (possibly nested) tuple of ints or `TensorShape`, this will + be a tuple having the corresponding shapes. If cells are `LSTMCells` + `state` will be a tuple containing a `LSTMStateTuple` for each cell. + Raises: + TypeError: If `cell` is not an instance of RNNCell. + ValueError: If inputs is None or an empty list. + """ + + if not _like_rnncell(cell): + raise TypeError("cell must be an instance of RNNCell") + + # By default, time_major==False and inputs are batch-major: shaped + + # [batch, time, depth] + + # For internal calculations, we transpose to [time, batch, depth] + + flat_input = nest.flatten(inputs) + + if not time_major: + # (B,T,D) => (T,B,D) + + flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] + + flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) + + parallel_iterations = parallel_iterations or 32 + + if sequence_length is not None: + + sequence_length = math_ops.to_int32(sequence_length) + + if sequence_length.get_shape().ndims not in (None, 1): + raise ValueError( + + "sequence_length must be a vector of length batch_size, " + + "but saw shape: %s" % sequence_length.get_shape()) + + sequence_length = array_ops.identity( # Just to find it in the graph. + + sequence_length, name="sequence_length") + + # Create a new scope in which the caching device is either + + # determined by the parent scope, or is set to place the cached + + # Variable using the same placement as for the rest of the RNN. + + with vs.variable_scope(scope or "rnn",reuse=tf.AUTO_REUSE) as varscope:#TODO:user defined reuse + + if varscope.caching_device is None: + varscope.set_caching_device(lambda op: op.device) + + batch_size = _best_effort_input_batch_size(flat_input) + + if initial_state is not None: + + state = initial_state + + else: + + if not dtype: + raise ValueError("If there is no initial_state, you must give a dtype.") + + state = cell.zero_state(batch_size, dtype) + + def _assert_has_shape(x, shape): + + x_shape = array_ops.shape(x) + + packed_shape = array_ops.stack(shape) + + return control_flow_ops.Assert( + + math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), + + ["Expected shape for Tensor %s is " % x.name, + + packed_shape, " but saw shape: ", x_shape]) + + if sequence_length is not None: + # Perform some shape validation + + with ops.control_dependencies( + + [_assert_has_shape(sequence_length, [batch_size])]): + sequence_length = array_ops.identity( + + sequence_length, name="CheckSeqLen") + + inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) + + (outputs, final_state) = _dynamic_rnn_loop( + + cell, + + inputs, + + state, + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory, + + att_scores=att_scores, + + sequence_length=sequence_length, + + dtype=dtype) + + # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. + + # If we are performing batch-major calculations, transpose output back + + # to shape [batch, time, depth] + + if not time_major: + # (T,B,D) => (B,T,D) + + outputs = nest.map_structure(_transpose_batch_time, outputs) + + return (outputs, final_state) + + +def _dynamic_rnn_loop(cell, + + inputs, + + initial_state, + + parallel_iterations, + + swap_memory, + + att_scores=None, + + sequence_length=None, + + dtype=None): + """Internal implementation of Dynamic RNN. + Args: + cell: An instance of RNNCell. + inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested + tuple of such elements. + initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if + `cell.state_size` is a tuple, then this should be a tuple of + tensors having shapes `[batch_size, s] for s in cell.state_size`. + parallel_iterations: Positive Python int. + swap_memory: A Python boolean + sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. + dtype: (optional) Expected dtype of output. If not specified, inferred from + initial_state. + Returns: + Tuple `(final_outputs, final_state)`. + final_outputs: + A `Tensor` of shape `[time, batch_size, cell.output_size]`. If + `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` + objects, then this returns a (possibly nsted) tuple of Tensors matching + the corresponding shapes. + final_state: + A `Tensor`, or possibly nested tuple of Tensors, matching in length + and shapes to `initial_state`. + Raises: + ValueError: If the input depth cannot be inferred via shape inference + from the inputs. + """ + + state = initial_state + + assert isinstance(parallel_iterations, int), "parallel_iterations must be int" + + state_size = cell.state_size + + flat_input = nest.flatten(inputs) + + flat_output_size = nest.flatten(cell.output_size) + + # Construct an initial output + + input_shape = array_ops.shape(flat_input[0]) + + time_steps = input_shape[0] + + batch_size = _best_effort_input_batch_size(flat_input) + + inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) + + for input_ in flat_input) + + const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] + + for shape in inputs_got_shape: + + if not shape[2:].is_fully_defined(): + raise ValueError( + + "Input size (depth of inputs) must be accessible via shape inference," + + " but saw value None.") + + got_time_steps = shape[0].value + + got_batch_size = shape[1].value + + if const_time_steps != got_time_steps: + raise ValueError( + + "Time steps is not the same for all the elements in the input in a " + + "batch.") + + if const_batch_size != got_batch_size: + raise ValueError( + + "Batch_size is not the same for all the elements in the input.") + + # Prepare dynamic conditional copying of state & output + + def _create_zero_arrays(size): + + size = _concat(batch_size, size) + + return array_ops.zeros( + + array_ops.stack(size), _infer_state_dtype(dtype, state)) + + flat_zero_output = tuple(_create_zero_arrays(output) + + for output in flat_output_size) + + zero_output = nest.pack_sequence_as(structure=cell.output_size, + + flat_sequence=flat_zero_output) + + if sequence_length is not None: + min_sequence_length = math_ops.reduce_min(sequence_length) + + max_sequence_length = math_ops.reduce_max(sequence_length) + + time = array_ops.constant(0, dtype=dtypes.int32, name="time") + + with ops.name_scope("dynamic_rnn") as scope: + + base_name = scope + + def _create_ta(name, dtype): + + return tensor_array_ops.TensorArray(dtype=dtype, + + size=time_steps, + + tensor_array_name=base_name + name) + + output_ta = tuple(_create_ta("output_%d" % i, + + _infer_state_dtype(dtype, state)) + + for i in range(len(flat_output_size))) + + input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) + + for i in range(len(flat_input))) + + input_ta = tuple(ta.unstack(input_) + + for ta, input_ in zip(input_ta, flat_input)) + + def _time_step(time, output_ta_t, state, att_scores=None): + + """Take a time step of the dynamic RNN. + Args: + time: int32 scalar Tensor. + output_ta_t: List of `TensorArray`s that represent the output. + state: nested tuple of vector tensors that represent the state. + Returns: + The tuple (time + 1, output_ta_t with updated flow, new_state). + """ + + input_t = tuple(ta.read(time) for ta in input_ta) + + # Restore some shape information + + for input_, shape in zip(input_t, inputs_got_shape): + input_.set_shape(shape[1:]) + + input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) + + if att_scores is not None: + + att_score = att_scores[:, time, :] + + call_cell = lambda: cell(input_t, state, att_score) + + else: + + call_cell = lambda: cell(input_t, state) + + if sequence_length is not None: + + (output, new_state) = _rnn_step( + + time=time, + + sequence_length=sequence_length, + + min_sequence_length=min_sequence_length, + + max_sequence_length=max_sequence_length, + + zero_output=zero_output, + + state=state, + + call_cell=call_cell, + + state_size=state_size, + + skip_conditionals=True) + + else: + + (output, new_state) = call_cell() + + # Pack state if using state tuples + + output = nest.flatten(output) + + output_ta_t = tuple( + + ta.write(time, out) for ta, out in zip(output_ta_t, output)) + + if att_scores is not None: + + return (time + 1, output_ta_t, new_state, att_scores) + + else: + + return (time + 1, output_ta_t, new_state) + + if att_scores is not None: + + _, output_final_ta, final_state, _ = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state, att_scores), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + else: + + _, output_final_ta, final_state = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + # Unpack final output if not using output tuples. + + final_outputs = tuple(ta.stack() for ta in output_final_ta) + + # Restore some shape information + + for output, output_size in zip(final_outputs, flat_output_size): + shape = _concat( + + [const_time_steps, const_batch_size], output_size, static=True) + + output.set_shape(shape) + + final_outputs = nest.pack_sequence_as( + + structure=cell.output_size, flat_sequence=final_outputs) + + return (final_outputs, final_state) \ No newline at end of file diff --git a/modelzoo/PNN/script/contrib/rnn_v2.py b/modelzoo/PNN/script/contrib/rnn_v2.py new file mode 100644 index 00000000000..a2bd625cd8b --- /dev/null +++ b/modelzoo/PNN/script/contrib/rnn_v2.py @@ -0,0 +1,1452 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +# + +# Licensed under the Apache License, Version 2.0 (the "License"); + +# you may not use this file except in compliance with the License. + +# You may obtain a copy of the License at + +# + +# http://www.apache.org/licenses/LICENSE-2.0 + +# + +# Unless required by applicable law or agreed to in writing, software + +# distributed under the License is distributed on an "AS IS" BASIS, + +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +# See the License for the specific language governing permissions and + +# limitations under the License. + +# ============================================================================== + + +"""RNN helpers for TensorFlow models. + + + + + +@@bidirectional_dynamic_rnn + +@@dynamic_rnn + +@@raw_rnn + +@@static_rnn + +@@static_state_saving_rnn + +@@static_bidirectional_rnn + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util import nest +import tensorflow as tf + + +def _like_rnncell_(cell): + """Checks that a given object is an RNNCell by using duck typing.""" + + conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"), + + hasattr(cell, "zero_state"), callable(cell)] + + return all(conditions) + + +# pylint: disable=protected-access + +_concat = rnn_cell_impl._concat +try: + _like_rnncell = rnn_cell_impl._like_rnncell +except: + _like_rnncell = _like_rnncell_ + + +# pylint: enable=protected-access + + +def _transpose_batch_time(x): + """Transpose the batch and time dimensions of a Tensor. + + + + Retains as much of the static shape information as possible. + + + + Args: + + x: A tensor of rank 2 or higher. + + + + Returns: + + x transposed along the first two dimensions. + + + + Raises: + + ValueError: if `x` is rank 1 or lower. + + """ + + x_static_shape = x.get_shape() + + if x_static_shape.ndims is not None and x_static_shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2, but saw shape: %s" % + + (x, x_static_shape)) + + x_rank = array_ops.rank(x) + + x_t = array_ops.transpose( + + x, array_ops.concat( + + ([1, 0], math_ops.range(2, x_rank)), axis=0)) + + x_t.set_shape( + + tensor_shape.TensorShape([ + + x_static_shape[1], x_static_shape[0] + + ]).concatenate(x_static_shape[2:])) + + return x_t + + +def _best_effort_input_batch_size(flat_input): + """Get static input batch size if available, with fallback to the dynamic one. + + + + Args: + + flat_input: An iterable of time major input Tensors of shape [max_time, + + batch_size, ...]. All inputs should have compatible batch sizes. + + + + Returns: + + The batch size in Python integer if available, or a scalar Tensor otherwise. + + + + Raises: + + ValueError: if there is any input with an invalid shape. + + """ + + for input_ in flat_input: + + shape = input_.shape + + if shape.ndims is None: + continue + + if shape.ndims < 2: + raise ValueError( + + "Expected input tensor %s to have rank at least 2" % input_) + + batch_size = shape[1] + + if batch_size is not None: + return batch_size + + # Fallback to the dynamic batch size of the first input. + + return array_ops.shape(flat_input[0])[1] + + +def _infer_state_dtype(explicit_dtype, state): + """Infer the dtype of an RNN state. + + + + Args: + + explicit_dtype: explicitly declared dtype or None. + + state: RNN's hidden state. Must be a Tensor or a nested iterable containing + + Tensors. + + + + Returns: + + dtype: inferred dtype of hidden state. + + + + Raises: + + ValueError: if `state` has heterogeneous dtypes or is empty. + + """ + + if explicit_dtype is not None: + + return explicit_dtype + + elif nest.is_sequence(state): + + inferred_dtypes = [element.dtype for element in nest.flatten(state)] + + if not inferred_dtypes: + raise ValueError("Unable to infer dtype from empty state.") + + all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) + + if not all_same: + raise ValueError( + + "State has tensors of different inferred_dtypes. Unable to infer a " + + "single representative dtype.") + + return inferred_dtypes[0] + + else: + + return state.dtype + + +# pylint: disable=unused-argument + +def _rnn_step( + + time, sequence_length, min_sequence_length, max_sequence_length, + + zero_output, state, call_cell, state_size, skip_conditionals=False): + """Calculate one step of a dynamic RNN minibatch. + + + + Returns an (output, state) pair conditioned on the sequence_lengths. + + When skip_conditionals=False, the pseudocode is something like: + + + + if t >= max_sequence_length: + + return (zero_output, state) + + if t < min_sequence_length: + + return call_cell() + + + + # Selectively output zeros or output, old state or new state depending + + # on if we've finished calculating each row. + + new_output, new_state = call_cell() + + final_output = np.vstack([ + + zero_output if time >= sequence_lengths[r] else new_output_r + + for r, new_output_r in enumerate(new_output) + + ]) + + final_state = np.vstack([ + + state[r] if time >= sequence_lengths[r] else new_state_r + + for r, new_state_r in enumerate(new_state) + + ]) + + return (final_output, final_state) + + + + Args: + + time: Python int, the current time step + + sequence_length: int32 `Tensor` vector of size [batch_size] + + min_sequence_length: int32 `Tensor` scalar, min of sequence_length + + max_sequence_length: int32 `Tensor` scalar, max of sequence_length + + zero_output: `Tensor` vector of shape [output_size] + + state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, + + or a list/tuple of such tensors. + + call_cell: lambda returning tuple of (new_output, new_state) where + + new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. + + new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. + + state_size: The `cell.state_size` associated with the state. + + skip_conditionals: Python bool, whether to skip using the conditional + + calculations. This is useful for `dynamic_rnn`, where the input tensor + + matches `max_sequence_length`, and using conditionals just slows + + everything down. + + + + Returns: + + A tuple of (`final_output`, `final_state`) as given by the pseudocode above: + + final_output is a `Tensor` matrix of shape [batch_size, output_size] + + final_state is either a single `Tensor` matrix, or a tuple of such + + matrices (matching length and shapes of input `state`). + + + + Raises: + + ValueError: If the cell returns a state tuple whose length does not match + + that returned by `state_size`. + + """ + + # Convert state to a list for ease of use + + flat_state = nest.flatten(state) + + flat_zero_output = nest.flatten(zero_output) + + def _copy_one_through(output, new_output): + + # If the state contains a scalar value we simply pass it through. + + if output.shape.ndims == 0: + return new_output + + copy_cond = (time >= sequence_length) + + with ops.colocate_with(new_output): + return array_ops.where(copy_cond, output, new_output) + + def _copy_some_through(flat_new_output, flat_new_state): + + # Use broadcasting select to determine which values should get + + # the previous state & zero output, and which values should get + + # a calculated state & output. + + flat_new_output = [ + + _copy_one_through(zero_output, new_output) + + for zero_output, new_output in zip(flat_zero_output, flat_new_output)] + + flat_new_state = [ + + _copy_one_through(state, new_state) + + for state, new_state in zip(flat_state, flat_new_state)] + + return flat_new_output + flat_new_state + + def _maybe_copy_some_through(): + + """Run RNN step. Pass through either no or some past state.""" + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + flat_new_state = nest.flatten(new_state) + + flat_new_output = nest.flatten(new_output) + + return control_flow_ops.cond( + + # if t < min_seq_len: calculate and return everything + + time < min_sequence_length, lambda: flat_new_output + flat_new_state, + + # else copy some of it through + + lambda: _copy_some_through(flat_new_output, flat_new_state)) + + # TODO(ebrevdo): skipping these conditionals may cause a slowdown, + + # but benefits from removing cond() and its gradient. We should + + # profile with and without this switch here. + + if skip_conditionals: + + # Instead of using conditionals, perform the selective copy at all time + + # steps. This is faster when max_seq_len is equal to the number of unrolls + + # (which is typical for dynamic_rnn). + + new_output, new_state = call_cell() + + nest.assert_same_structure(state, new_state) + + new_state = nest.flatten(new_state) + + new_output = nest.flatten(new_output) + + final_output_and_state = _copy_some_through(new_output, new_state) + + else: + + empty_update = lambda: flat_zero_output + flat_state + + final_output_and_state = control_flow_ops.cond( + + # if t >= max_seq_len: copy all state through, output zeros + + time >= max_sequence_length, empty_update, + + # otherwise calculation is required: copy some or all of it through + + _maybe_copy_some_through) + + if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): + raise ValueError("Internal error: state and output were not concatenated " + + "correctly.") + + final_output = final_output_and_state[:len(flat_zero_output)] + + final_state = final_output_and_state[len(flat_zero_output):] + + for output, flat_output in zip(final_output, flat_zero_output): + output.set_shape(flat_output.get_shape()) + + for substate, flat_substate in zip(final_state, flat_state): + substate.set_shape(flat_substate.get_shape()) + + final_output = nest.pack_sequence_as( + + structure=zero_output, flat_sequence=final_output) + + final_state = nest.pack_sequence_as( + + structure=state, flat_sequence=final_state) + + return final_output, final_state + + +def _reverse_seq(input_seq, lengths): + """Reverse a list of Tensors up to specified lengths. + + + + Args: + + input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) + + or nested tuples of tensors. + + lengths: A `Tensor` of dimension batch_size, containing lengths for each + + sequence in the batch. If "None" is specified, simply reverses + + the list. + + + + Returns: + + time-reversed sequence + + """ + + if lengths is None: + return list(reversed(input_seq)) + + flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) + + flat_results = [[] for _ in range(len(input_seq))] + + for sequence in zip(*flat_input_seq): + + input_shape = tensor_shape.unknown_shape( + + ndims=sequence[0].get_shape().ndims) + + for input_ in sequence: + input_shape.merge_with(input_.get_shape()) + + input_.set_shape(input_shape) + + # Join into (time, batch_size, depth) + + s_joined = array_ops.stack(sequence) + + # Reverse along dimension 0 + + s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) + + # Split again into list + + result = array_ops.unstack(s_reversed) + + for r, flat_result in zip(result, flat_results): + r.set_shape(input_shape) + + flat_result.append(r) + + results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) + + for input_, flat_result in zip(input_seq, flat_results)] + + return results + + +# +# def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, +# +# initial_state_fw=None, initial_state_bw=None, +# +# dtype=None, parallel_iterations=None, +# +# swap_memory=False, time_major=False, scope=None): +# +# """Creates a dynamic version of bidirectional recurrent neural network. +# +# +# +# Takes input and builds independent forward and backward RNNs. The input_size +# +# of forward and backward cell must match. The initial state for both directions +# +# is zero by default (but can be set optionally) and no intermediate states are +# +# ever returned -- the network is fully unrolled for the given (passed in) +# +# length(s) of the sequence(s) or completely unrolled if length(s) is not +# +# given. +# +# +# +# Args: +# +# cell_fw: An instance of RNNCell, to be used for forward direction. +# +# cell_bw: An instance of RNNCell, to be used for backward direction. +# +# inputs: The RNN inputs. +# +# If time_major == False (default), this must be a tensor of shape: +# +# `[batch_size, max_time, ...]`, or a nested tuple of such elements. +# +# If time_major == True, this must be a tensor of shape: +# +# `[max_time, batch_size, ...]`, or a nested tuple of such elements. +# +# sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, +# +# containing the actual lengths for each of the sequences in the batch. +# +# If not provided, all batch entries are assumed to be full sequences; and +# +# time reversal is applied from time `0` to `max_time` for each sequence. +# +# initial_state_fw: (optional) An initial state for the forward RNN. +# +# This must be a tensor of appropriate type and shape +# +# `[batch_size, cell_fw.state_size]`. +# +# If `cell_fw.state_size` is a tuple, this should be a tuple of +# +# tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. +# +# initial_state_bw: (optional) Same as for `initial_state_fw`, but using +# +# the corresponding properties of `cell_bw`. +# +# dtype: (optional) The data type for the initial states and expected output. +# +# Required if initial_states are not provided or RNN states have a +# +# heterogeneous dtype. +# +# parallel_iterations: (Default: 32). The number of iterations to run in +# +# parallel. Those operations which do not have any temporal dependency +# +# and can be run in parallel, will be. This parameter trades off +# +# time for space. Values >> 1 use more memory but take less time, +# +# while smaller values use less memory but computations take longer. +# +# swap_memory: Transparently swap the tensors produced in forward inference +# +# but needed for back prop from GPU to CPU. This allows training RNNs +# +# which would typically not fit on a single GPU, with very minimal (or no) +# +# performance penalty. +# +# time_major: The shape format of the `inputs` and `outputs` Tensors. +# +# If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. +# +# If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. +# +# Using `time_major = True` is a bit more efficient because it avoids +# +# transposes at the beginning and end of the RNN calculation. However, +# +# most TensorFlow data is batch-major, so by default this function +# +# accepts input and emits output in batch-major form. +# +# scope: VariableScope for the created subgraph; defaults to +# +# "bidirectional_rnn" +# +# +# +# Returns: +# +# A tuple (outputs, output_states) where: +# +# outputs: A tuple (output_fw, output_bw) containing the forward and +# +# the backward rnn output `Tensor`. +# +# If time_major == False (default), +# +# output_fw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[batch_size, max_time, cell_bw.output_size]`. +# +# If time_major == True, +# +# output_fw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_fw.output_size]` +# +# and output_bw will be a `Tensor` shaped: +# +# `[max_time, batch_size, cell_bw.output_size]`. +# +# It returns a tuple instead of a single concatenated `Tensor`, unlike +# +# in the `bidirectional_rnn`. If the concatenated one is preferred, +# +# the forward and backward outputs can be concatenated as +# +# `tf.concat(outputs, 2)`. +# +# output_states: A tuple (output_state_fw, output_state_bw) containing +# +# the forward and the backward final states of bidirectional rnn. +# +# +# +# Raises: +# +# TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. +# +# """ +# +# +# +# if not _like_rnncell(cell_fw): +# +# raise TypeError("cell_fw must be an instance of RNNCell") +# +# if not _like_rnncell(cell_bw): +# +# raise TypeError("cell_bw must be an instance of RNNCell") +# +# +# +# with vs.variable_scope(scope or "bidirectional_rnn"): +# +# # Forward direction +# +# with vs.variable_scope("fw") as fw_scope: +# +# output_fw, output_state_fw = dynamic_rnn( +# +# cell=cell_fw, inputs=inputs, sequence_length=sequence_length, +# +# initial_state=initial_state_fw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=fw_scope) +# +# +# +# # Backward direction +# +# if not time_major: +# +# time_dim = 1 +# +# batch_dim = 0 +# +# else: +# +# time_dim = 0 +# +# batch_dim = 1 +# +# +# +# def _reverse(input_, seq_lengths, seq_dim, batch_dim): +# +# if seq_lengths is not None: +# +# return array_ops.reverse_sequence( +# +# input=input_, seq_lengths=seq_lengths, +# +# seq_dim=seq_dim, batch_dim=batch_dim) +# +# else: +# +# return array_ops.reverse(input_, axis=[seq_dim]) +# +# +# +# with vs.variable_scope("bw") as bw_scope: +# +# inputs_reverse = _reverse( +# +# inputs, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# tmp, output_state_bw = dynamic_rnn( +# +# cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, +# +# initial_state=initial_state_bw, dtype=dtype, +# +# parallel_iterations=parallel_iterations, swap_memory=swap_memory, +# +# time_major=time_major, scope=bw_scope) +# +# +# +# output_bw = _reverse( +# +# tmp, seq_lengths=sequence_length, +# +# seq_dim=time_dim, batch_dim=batch_dim) +# +# +# +# outputs = (output_fw, output_bw) +# +# output_states = (output_state_fw, output_state_bw) +# +# +# +# return (outputs, output_states) +# + + +def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, + + dtype=None, parallel_iterations=None, swap_memory=False, + + time_major=False, scope=None): + """Creates a recurrent neural network specified by RNNCell `cell`. + + + + Performs fully dynamic unrolling of `inputs`. + + + + Example: + + + + ```python + + # create a BasicRNNCell + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + + + + # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] + + + + # defining initial state + + initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) + + + + # 'state' is a tensor of shape [batch_size, cell_state_size] + + outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, + + initial_state=initial_state, + + dtype=tf.float32) + + ``` + + + + ```python + + # create 2 LSTMCells + + rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] + + + + # create a RNN cell composed sequentially of a number of RNNCells + + multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) + + + + # 'outputs' is a tensor of shape [batch_size, max_time, 256] + + # 'state' is a N-tuple where N is the number of LSTMCells containing a + + # tf.contrib.rnn.LSTMStateTuple for each cell + + outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, + + inputs=data, + + dtype=tf.float32) + + ``` + + + + + + Args: + + cell: An instance of RNNCell. + + inputs: The RNN inputs. + + If `time_major == False` (default), this must be a `Tensor` of shape: + + `[batch_size, max_time, ...]`, or a nested tuple of such + + elements. + + If `time_major == True`, this must be a `Tensor` of shape: + + `[max_time, batch_size, ...]`, or a nested tuple of such + + elements. + + This may also be a (possibly nested) tuple of Tensors satisfying + + this property. The first two dimensions must match across all the inputs, + + but otherwise the ranks and other shape components may differ. + + In this case, input to `cell` at each time-step will replicate the + + structure of these tuples, except for the time dimension (from which the + + time is taken). + + The input to `cell` at each time step will be a `Tensor` or (possibly + + nested) tuple of Tensors each with dimensions `[batch_size, ...]`. + + sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. + + Used to copy-through state and zero-out outputs when past a batch + + element's sequence length. So it's more for correctness than performance. + + initial_state: (optional) An initial state for the RNN. + + If `cell.state_size` is an integer, this must be + + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. + + If `cell.state_size` is a tuple, this should be a tuple of + + tensors having shapes `[batch_size, s] for s in cell.state_size`. + + dtype: (optional) The data type for the initial state and expected output. + + Required if initial_state is not provided or RNN state has a heterogeneous + + dtype. + + parallel_iterations: (Default: 32). The number of iterations to run in + + parallel. Those operations which do not have any temporal dependency + + and can be run in parallel, will be. This parameter trades off + + time for space. Values >> 1 use more memory but take less time, + + while smaller values use less memory but computations take longer. + + swap_memory: Transparently swap the tensors produced in forward inference + + but needed for back prop from GPU to CPU. This allows training RNNs + + which would typically not fit on a single GPU, with very minimal (or no) + + performance penalty. + + time_major: The shape format of the `inputs` and `outputs` Tensors. + + If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. + + If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. + + Using `time_major = True` is a bit more efficient because it avoids + + transposes at the beginning and end of the RNN calculation. However, + + most TensorFlow data is batch-major, so by default this function + + accepts input and emits output in batch-major form. + + scope: VariableScope for the created subgraph; defaults to "rnn". + + + + Returns: + + A pair (outputs, state) where: + + + + outputs: The RNN output `Tensor`. + + + + If time_major == False (default), this will be a `Tensor` shaped: + + `[batch_size, max_time, cell.output_size]`. + + + + If time_major == True, this will be a `Tensor` shaped: + + `[max_time, batch_size, cell.output_size]`. + + + + Note, if `cell.output_size` is a (possibly nested) tuple of integers + + or `TensorShape` objects, then `outputs` will be a tuple having the + + same structure as `cell.output_size`, containing Tensors having shapes + + corresponding to the shape data in `cell.output_size`. + + + + state: The final state. If `cell.state_size` is an int, this + + will be shaped `[batch_size, cell.state_size]`. If it is a + + `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. + + If it is a (possibly nested) tuple of ints or `TensorShape`, this will + + be a tuple having the corresponding shapes. If cells are `LSTMCells` + + `state` will be a tuple containing a `LSTMStateTuple` for each cell. + + + + Raises: + + TypeError: If `cell` is not an instance of RNNCell. + + ValueError: If inputs is None or an empty list. + + """ + + if not _like_rnncell(cell): + raise TypeError("cell must be an instance of RNNCell") + + # By default, time_major==False and inputs are batch-major: shaped + + # [batch, time, depth] + + # For internal calculations, we transpose to [time, batch, depth] + + flat_input = nest.flatten(inputs) + + if not time_major: + # (B,T,D) => (T,B,D) + + flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] + + flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) + + parallel_iterations = parallel_iterations or 32 + + if sequence_length is not None: + + sequence_length = math_ops.to_int32(sequence_length) + + if sequence_length.get_shape().ndims not in (None, 1): + raise ValueError( + + "sequence_length must be a vector of length batch_size, " + + "but saw shape: %s" % sequence_length.get_shape()) + + sequence_length = array_ops.identity( # Just to find it in the graph. + + sequence_length, name="sequence_length") + + # Create a new scope in which the caching device is either + + # determined by the parent scope, or is set to place the cached + + # Variable using the same placement as for the rest of the RNN. + + try: + resue = tf.AUTO_REUSE + except: + resue = tf.compat.v1.AUTO_REUSE + + with vs.variable_scope(scope or "rnn",reuse=resue) as varscope:#TODO:user defined reuse + + if varscope.caching_device is None: + varscope.set_caching_device(lambda op: op.device) + + batch_size = _best_effort_input_batch_size(flat_input) + + if initial_state is not None: + + state = initial_state + + else: + + if not dtype: + raise ValueError("If there is no initial_state, you must give a dtype.") + + state = cell.zero_state(batch_size, dtype) + + def _assert_has_shape(x, shape): + + x_shape = array_ops.shape(x) + + packed_shape = array_ops.stack(shape) + + return control_flow_ops.Assert( + + math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), + + ["Expected shape for Tensor %s is " % x.name, + + packed_shape, " but saw shape: ", x_shape]) + + if sequence_length is not None: + # Perform some shape validation + + with ops.control_dependencies( + + [_assert_has_shape(sequence_length, [batch_size])]): + sequence_length = array_ops.identity( + + sequence_length, name="CheckSeqLen") + + inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) + + (outputs, final_state) = _dynamic_rnn_loop( + + cell, + + inputs, + + state, + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory, + + att_scores=att_scores, + + sequence_length=sequence_length, + + dtype=dtype) + + # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. + + # If we are performing batch-major calculations, transpose output back + + # to shape [batch, time, depth] + + if not time_major: + # (T,B,D) => (B,T,D) + + outputs = nest.map_structure(_transpose_batch_time, outputs) + + return (outputs, final_state) + + +def _dynamic_rnn_loop(cell, + + inputs, + + initial_state, + + parallel_iterations, + + swap_memory, + + att_scores=None, + + sequence_length=None, + + dtype=None): + """Internal implementation of Dynamic RNN. + + + + Args: + + cell: An instance of RNNCell. + + inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested + + tuple of such elements. + + initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if + + `cell.state_size` is a tuple, then this should be a tuple of + + tensors having shapes `[batch_size, s] for s in cell.state_size`. + + parallel_iterations: Positive Python int. + + swap_memory: A Python boolean + + sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. + + dtype: (optional) Expected dtype of output. If not specified, inferred from + + initial_state. + + + + Returns: + + Tuple `(final_outputs, final_state)`. + + final_outputs: + + A `Tensor` of shape `[time, batch_size, cell.output_size]`. If + + `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` + + objects, then this returns a (possibly nsted) tuple of Tensors matching + + the corresponding shapes. + + final_state: + + A `Tensor`, or possibly nested tuple of Tensors, matching in length + + and shapes to `initial_state`. + + + + Raises: + + ValueError: If the input depth cannot be inferred via shape inference + + from the inputs. + + """ + + state = initial_state + + assert isinstance(parallel_iterations, int), "parallel_iterations must be int" + + state_size = cell.state_size + + flat_input = nest.flatten(inputs) + + flat_output_size = nest.flatten(cell.output_size) + + # Construct an initial output + + input_shape = array_ops.shape(flat_input[0]) + + time_steps = input_shape[0] + + batch_size = _best_effort_input_batch_size(flat_input) + + inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) + + for input_ in flat_input) + + const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] + + for shape in inputs_got_shape: + + if not shape[2:].is_fully_defined(): + raise ValueError( + + "Input size (depth of inputs) must be accessible via shape inference," + + " but saw value None.") + + got_time_steps = shape[0] + + got_batch_size = shape[1] + + if const_time_steps != got_time_steps: + raise ValueError( + + "Time steps is not the same for all the elements in the input in a " + + "batch.") + + if const_batch_size != got_batch_size: + raise ValueError( + + "Batch_size is not the same for all the elements in the input.") + + # Prepare dynamic conditional copying of state & output + + def _create_zero_arrays(size): + + size = _concat(batch_size, size) + + return array_ops.zeros( + + array_ops.stack(size), _infer_state_dtype(dtype, state)) + + flat_zero_output = tuple(_create_zero_arrays(output) + + for output in flat_output_size) + + zero_output = nest.pack_sequence_as(structure=cell.output_size, + + flat_sequence=flat_zero_output) + + if sequence_length is not None: + min_sequence_length = math_ops.reduce_min(sequence_length) + + max_sequence_length = math_ops.reduce_max(sequence_length) + + time = array_ops.constant(0, dtype=dtypes.int32, name="time") + + with ops.name_scope("dynamic_rnn") as scope: + + base_name = scope + + def _create_ta(name, dtype): + + return tensor_array_ops.TensorArray(dtype=dtype, + + size=time_steps, + + tensor_array_name=base_name + name) + + output_ta = tuple(_create_ta("output_%d" % i, + + _infer_state_dtype(dtype, state)) + + for i in range(len(flat_output_size))) + + input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) + + for i in range(len(flat_input))) + + input_ta = tuple(ta.unstack(input_) + + for ta, input_ in zip(input_ta, flat_input)) + + def _time_step(time, output_ta_t, state, att_scores=None): + + """Take a time step of the dynamic RNN. + + + + Args: + + time: int32 scalar Tensor. + + output_ta_t: List of `TensorArray`s that represent the output. + + state: nested tuple of vector tensors that represent the state. + + + + Returns: + + The tuple (time + 1, output_ta_t with updated flow, new_state). + + """ + + input_t = tuple(ta.read(time) for ta in input_ta) + + # Restore some shape information + + for input_, shape in zip(input_t, inputs_got_shape): + input_.set_shape(shape[1:]) + + input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) + + if att_scores is not None: + + att_score = att_scores[:, time, :] + + call_cell = lambda: cell(input_t, state, att_score) + + else: + + call_cell = lambda: cell(input_t, state) + + if sequence_length is not None: + + (output, new_state) = _rnn_step( + + time=time, + + sequence_length=sequence_length, + + min_sequence_length=min_sequence_length, + + max_sequence_length=max_sequence_length, + + zero_output=zero_output, + + state=state, + + call_cell=call_cell, + + state_size=state_size, + + skip_conditionals=True) + + else: + + (output, new_state) = call_cell() + + # Pack state if using state tuples + + output = nest.flatten(output) + + output_ta_t = tuple( + + ta.write(time, out) for ta, out in zip(output_ta_t, output)) + + if att_scores is not None: + + return (time + 1, output_ta_t, new_state, att_scores) + + else: + + return (time + 1, output_ta_t, new_state) + + if att_scores is not None: + + _, output_final_ta, final_state, _ = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state, att_scores), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + else: + + _, output_final_ta, final_state = control_flow_ops.while_loop( + + cond=lambda time, *_: time < time_steps, + + body=_time_step, + + loop_vars=(time, output_ta, state), + + parallel_iterations=parallel_iterations, + + swap_memory=swap_memory) + + # Unpack final output if not using output tuples. + + final_outputs = tuple(ta.stack() for ta in output_final_ta) + + # Restore some shape information + + for output, output_size in zip(final_outputs, flat_output_size): + shape = _concat( + + [const_time_steps, const_batch_size], output_size, static=True) + + output.set_shape(shape) + + final_outputs = nest.pack_sequence_as( + + structure=cell.output_size, flat_sequence=final_outputs) + + return (final_outputs, final_state) diff --git a/modelzoo/PNN/script/contrib/utils.py b/modelzoo/PNN/script/contrib/utils.py new file mode 100644 index 00000000000..692f4ef6e89 --- /dev/null +++ b/modelzoo/PNN/script/contrib/utils.py @@ -0,0 +1,378 @@ +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.ops.rnn_cell import * +from tensorflow.python.util import nest + +_BIAS_VARIABLE_NAME = "bias" + +_WEIGHTS_VARIABLE_NAME = "kernel" + + +class _Linear_(object): + """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. + + + + Args: + + args: a 2D Tensor or a list of 2D, batch x n, Tensors. + + output_size: int, second dimension of weight variable. + + dtype: data type for variables. + + build_bias: boolean, whether to build a bias variable. + + bias_initializer: starting value to initialize the bias + + (default is all zeros). + + kernel_initializer: starting value to initialize the weight. + + + + Raises: + + ValueError: if inputs_shape is wrong. + + """ + + def __init__(self, + + args, + + output_size, + + build_bias, + + bias_initializer=None, + + kernel_initializer=None): + + self._build_bias = build_bias + + if args is None or (nest.is_sequence(args) and not args): + raise ValueError("`args` must be specified") + + if not nest.is_sequence(args): + + args = [args] + + self._is_sequence = False + + else: + + self._is_sequence = True + + # Calculate the total size of arguments on dimension 1. + + total_arg_size = 0 + + shapes = [a.get_shape() for a in args] + + for shape in shapes: + + if shape.ndims != 2: + raise ValueError( + "linear is expecting 2D arguments: %s" % shapes) + + if shape[1] is None: + + raise ValueError("linear expects shape[1] to be provided for shape %s, " + + "but saw %s" % (shape, shape[1])) + + else: + + total_arg_size += int(shape[1])#.value + + dtype = [a.dtype for a in args][0] + + scope = vs.get_variable_scope() + + with vs.variable_scope(scope) as outer_scope: + + self._weights = vs.get_variable( + + _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], + + dtype=dtype, + + initializer=kernel_initializer) + + if build_bias: + + with vs.variable_scope(outer_scope) as inner_scope: + + inner_scope.set_partitioner(None) + + if bias_initializer is None: + bias_initializer = init_ops.constant_initializer( + 0.0, dtype=dtype) + + self._biases = vs.get_variable( + + _BIAS_VARIABLE_NAME, [output_size], + + dtype=dtype, + + initializer=bias_initializer) + + def __call__(self, args): + + if not self._is_sequence: + args = [args] + + if len(args) == 1: + + res = math_ops.matmul(args[0], self._weights) + + else: + + res = math_ops.matmul(array_ops.concat(args, 1), self._weights) + + if self._build_bias: + res = nn_ops.bias_add(res, self._biases) + + return res + + +try: + from tensorflow.python.ops.rnn_cell_impl import _Linear +except: + _Linear = _Linear_ + + +class QAAttGRUCell(RNNCell): + """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). + + Args: + + num_units: int, The number of units in the GRU cell. + + activation: Nonlinearity to use. Default: `tanh`. + + reuse: (optional) Python boolean describing whether to reuse variables + + in an existing scope. If not `True`, and the existing scope already has + + the given variables, an error is raised. + + kernel_initializer: (optional) The initializer to use for the weight and + + projection matrices. + + bias_initializer: (optional) The initializer to use for the bias. + + """ + + def __init__(self, + + num_units, + + activation=None, + + reuse=None, + + kernel_initializer=None, + + bias_initializer=None): + + super(QAAttGRUCell, self).__init__(_reuse=reuse) + + self._num_units = num_units + + self._activation = activation or math_ops.tanh + + self._kernel_initializer = kernel_initializer + + self._bias_initializer = bias_initializer + + self._gate_linear = None + + self._candidate_linear = None + + @property + def state_size(self): + + return self._num_units + + @property + def output_size(self): + + return self._num_units + + def __call__(self, inputs, state, att_score): + + return self.call(inputs, state, att_score) + + def call(self, inputs, state, att_score=None): + """Gated recurrent unit (GRU) with nunits cells.""" + + if self._gate_linear is None: + + bias_ones = self._bias_initializer + + if self._bias_initializer is None: + bias_ones = init_ops.constant_initializer( + 1.0, dtype=inputs.dtype) + + with vs.variable_scope("gates"): # Reset gate and update gate. + + self._gate_linear = _Linear( + + [inputs, state], + + 2 * self._num_units, + + True, + + bias_initializer=bias_ones, + + kernel_initializer=self._kernel_initializer) + + value = math_ops.sigmoid(self._gate_linear([inputs, state])) + + r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) + + r_state = r * state + + if self._candidate_linear is None: + with vs.variable_scope("candidate"): + self._candidate_linear = _Linear( + + [inputs, r_state], + + self._num_units, + + True, + + bias_initializer=self._bias_initializer, + + kernel_initializer=self._kernel_initializer) + + c = self._activation(self._candidate_linear([inputs, r_state])) + + new_h = (1. - att_score) * state + att_score * c + + return new_h, new_h + + +class VecAttGRUCell(RNNCell): + """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). + + Args: + + num_units: int, The number of units in the GRU cell. + + activation: Nonlinearity to use. Default: `tanh`. + + reuse: (optional) Python boolean describing whether to reuse variables + + in an existing scope. If not `True`, and the existing scope already has + + the given variables, an error is raised. + + kernel_initializer: (optional) The initializer to use for the weight and + + projection matrices. + + bias_initializer: (optional) The initializer to use for the bias. + + """ + + def __init__(self, + + num_units, + + activation=None, + + reuse=None, + + kernel_initializer=None, + + bias_initializer=None): + + super(VecAttGRUCell, self).__init__(_reuse=reuse) + + self._num_units = num_units + + self._activation = activation or math_ops.tanh + + self._kernel_initializer = kernel_initializer + + self._bias_initializer = bias_initializer + + self._gate_linear = None + + self._candidate_linear = None + + @property + def state_size(self): + + return self._num_units + + @property + def output_size(self): + + return self._num_units + + def __call__(self, inputs, state, att_score): + + return self.call(inputs, state, att_score) + + def call(self, inputs, state, att_score=None): + """Gated recurrent unit (GRU) with nunits cells.""" + + if self._gate_linear is None: + + bias_ones = self._bias_initializer + + if self._bias_initializer is None: + bias_ones = init_ops.constant_initializer( + 1.0, dtype=inputs.dtype) + + with vs.variable_scope("gates"): # Reset gate and update gate. + + self._gate_linear = _Linear( + + [inputs, state], + + 2 * self._num_units, + + True, + + bias_initializer=bias_ones, + + kernel_initializer=self._kernel_initializer) + + value = math_ops.sigmoid(self._gate_linear([inputs, state])) + + r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) + + r_state = r * state + + if self._candidate_linear is None: + with vs.variable_scope("candidate"): + self._candidate_linear = _Linear( + + [inputs, r_state], + + self._num_units, + + True, + + bias_initializer=self._bias_initializer, + + kernel_initializer=self._kernel_initializer) + + c = self._activation(self._candidate_linear([inputs, r_state])) + + u = (1.0 - att_score) * u + + new_h = u * state + (1 - u) * c + + return new_h, new_h diff --git a/modelzoo/PNN/script/estimator/__init__.py b/modelzoo/PNN/script/estimator/__init__.py new file mode 100644 index 00000000000..cf4f59d6c09 --- /dev/null +++ b/modelzoo/PNN/script/estimator/__init__.py @@ -0,0 +1 @@ +from .models import * \ No newline at end of file diff --git a/modelzoo/PNN/script/estimator/feature_column.py b/modelzoo/PNN/script/estimator/feature_column.py new file mode 100644 index 00000000000..c8d7a6cd013 --- /dev/null +++ b/modelzoo/PNN/script/estimator/feature_column.py @@ -0,0 +1,52 @@ +import tensorflow as tf +from tensorflow.python.feature_column.feature_column import _EmbeddingColumn + +from .utils import LINEAR_SCOPE_NAME, variable_scope, get_collection, get_GraphKeys, input_layer, get_losses + + +def linear_model(features, linear_feature_columns): + if tf.__version__ >= '2.0.0': + linear_logits = tf.compat.v1.feature_column.linear_model(features, linear_feature_columns) + else: + linear_logits = tf.feature_column.linear_model(features, linear_feature_columns) + return linear_logits + + +def get_linear_logit(features, linear_feature_columns, l2_reg_linear=0): + with variable_scope(LINEAR_SCOPE_NAME): + if not linear_feature_columns: + linear_logits = tf.Variable([[0.0]], name='bias_weights') + else: + + linear_logits = linear_model(features, linear_feature_columns) + + if l2_reg_linear > 0: + for var in get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME)[:-1]: + get_losses().add_loss(l2_reg_linear * tf.nn.l2_loss(var, name=var.name.split(":")[0] + "_l2loss"), + get_GraphKeys().REGULARIZATION_LOSSES) + return linear_logits + + +def input_from_feature_columns(features, feature_columns, l2_reg_embedding=0.0): + dense_value_list = [] + sparse_emb_list = [] + for feat in feature_columns: + if is_embedding(feat): + sparse_emb = tf.expand_dims(input_layer(features, [feat]), axis=1) + sparse_emb_list.append(sparse_emb) + if l2_reg_embedding > 0: + get_losses().add_loss(l2_reg_embedding * tf.nn.l2_loss(sparse_emb, name=feat.name + "_l2loss"), + get_GraphKeys().REGULARIZATION_LOSSES) + + else: + dense_value_list.append(input_layer(features, [feat])) + + return sparse_emb_list, dense_value_list + + +def is_embedding(feature_column): + try: + from tensorflow.python.feature_column.feature_column_v2 import EmbeddingColumn + except ImportError: + EmbeddingColumn = _EmbeddingColumn + return isinstance(feature_column, (_EmbeddingColumn, EmbeddingColumn)) diff --git a/modelzoo/PNN/script/estimator/inputs.py b/modelzoo/PNN/script/estimator/inputs.py new file mode 100644 index 00000000000..2c175a9934e --- /dev/null +++ b/modelzoo/PNN/script/estimator/inputs.py @@ -0,0 +1,52 @@ +import tensorflow as tf + + +def input_fn_pandas(df, features, label=None, batch_size=256, num_epochs=1, shuffle=False, queue_capacity_factor=10, + num_threads=1): + if label is not None: + y = df[label] + else: + y = None + if tf.__version__ >= "2.0.0": + return tf.compat.v1.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, + num_epochs=num_epochs, + shuffle=shuffle, + queue_capacity=batch_size * queue_capacity_factor, + num_threads=num_threads) + + return tf.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, num_epochs=num_epochs, + shuffle=shuffle, queue_capacity=batch_size * queue_capacity_factor, + num_threads=num_threads) + + +def input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, num_parallel_calls=8, + shuffle_factor=10, prefetch_factor=1, + ): + def _parse_examples(serial_exmp): + try: + features = tf.parse_single_example(serial_exmp, features=feature_description) + except AttributeError: + features = tf.io.parse_single_example(serial_exmp, features=feature_description) + if label is not None: + labels = features.pop(label) + return features, labels + return features + + def input_fn(): + dataset = tf.data.TFRecordDataset(filenames) + dataset = dataset.map(_parse_examples, num_parallel_calls=num_parallel_calls) + if shuffle_factor > 0: + dataset = dataset.shuffle(buffer_size=batch_size * shuffle_factor) + + dataset = dataset.repeat(num_epochs).batch(batch_size) + + if prefetch_factor > 0: + dataset = dataset.prefetch(buffer_size=batch_size * prefetch_factor) + try: + iterator = dataset.make_one_shot_iterator() + except AttributeError: + iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + + return iterator.get_next() + + return input_fn diff --git a/modelzoo/PNN/script/estimator/models/__init__.py b/modelzoo/PNN/script/estimator/models/__init__.py new file mode 100644 index 00000000000..9bc1e120dbc --- /dev/null +++ b/modelzoo/PNN/script/estimator/models/__init__.py @@ -0,0 +1,13 @@ +from .afm import AFMEstimator +from .autoint import AutoIntEstimator +from .ccpm import CCPMEstimator +from .dcn import DCNEstimator +from .deepfm import DeepFMEstimator +from .fwfm import FwFMEstimator +from .fibinet import FiBiNETEstimator +from .fnn import FNNEstimator +from .nfm import NFMEstimator +from .pnn import PNNEstimator +from .wdl import WDLEstimator +from .xdeepfm import xDeepFMEstimator +from .deepfefm import DeepFEFMEstimator diff --git a/modelzoo/PNN/script/estimator/models/pnn.py b/modelzoo/PNN/script/estimator/models/pnn.py new file mode 100644 index 00000000000..7e4c159c276 --- /dev/null +++ b/modelzoo/PNN/script/estimator/models/pnn.py @@ -0,0 +1,93 @@ +# -*- coding:utf-8 -*- +""" +Author: + Weichen Shen, weichenswc@163.com + +Reference: + [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf) +""" + +import tensorflow as tf + +from ..feature_column import get_linear_logit, input_from_feature_columns +from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope +from ...layers.core import DNN +from ...layers.interaction import InnerProductLayer, OutterProductLayer +from ...layers.utils import concat_func, combined_dnn_input + + +def PNNEstimator(dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=1e-5, l2_reg_dnn=0, + seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False, kernel_type='mat', + task='binary', model_dir=None, config=None, + linear_optimizer='Ftrl', + dnn_optimizer='Adagrad', training_chief_hooks=None): + """Instantiates the Product-based Neural Network architecture. + + :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. + :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net + :param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector + :param l2_reg_dnn: float. L2 regularizer strength applied to DNN + :param seed: integer ,to use as random seed. + :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. + :param dnn_activation: Activation function to use in DNN + :param use_inner: bool,whether use inner-product or not. + :param use_outter: bool,whether use outter-product or not. + :param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'`` + :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + :param model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator + to continue training a previously saved model. + :param config: tf.RunConfig object to configure the runtime settings. + :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the linear part of the model. Defaults to FTRL optimizer. + :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the deep part of the model. Defaults to Adagrad optimizer. + :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to + run on the chief worker during training. + :return: A Tensorflow Estimator instance. + + """ + + if kernel_type not in ['mat', 'vec', 'num']: + raise ValueError("kernel_type must be mat,vec or num") + + def _model_fn(features, labels, mode, config): + train_flag = (mode == tf.estimator.ModeKeys.TRAIN) + + linear_logits = get_linear_logit(features, [], l2_reg_linear=0) + + with variable_scope(DNN_SCOPE_NAME): + sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, + l2_reg_embedding=l2_reg_embedding) + + inner_product = tf.keras.layers.Flatten()( + InnerProductLayer()(sparse_embedding_list)) + outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list) + + # ipnn deep input + linear_signal = tf.keras.layers.Reshape( + [sum(map(lambda x: int(x.shape[-1]), sparse_embedding_list))])(concat_func(sparse_embedding_list)) + + if use_inner and use_outter: + deep_input = tf.keras.layers.Concatenate()( + [linear_signal, inner_product, outter_product]) + elif use_inner: + deep_input = tf.keras.layers.Concatenate()( + [linear_signal, inner_product]) + elif use_outter: + deep_input = tf.keras.layers.Concatenate()( + [linear_signal, outter_product]) + else: + deep_input = linear_signal + + dnn_input = combined_dnn_input([deep_input], dense_value_list) + dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag) + dnn_logit = tf.keras.layers.Dense( + 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out) + + logits = linear_logits + dnn_logit + + return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, + training_chief_hooks=training_chief_hooks) + + return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) diff --git a/modelzoo/PNN/script/estimator/utils.py b/modelzoo/PNN/script/estimator/utils.py new file mode 100644 index 00000000000..5d722515f6b --- /dev/null +++ b/modelzoo/PNN/script/estimator/utils.py @@ -0,0 +1,217 @@ +import tensorflow as tf +from tensorflow.python.estimator.canned.head import _Head +from tensorflow.python.estimator.canned.optimizers import get_optimizer_instance + +LINEAR_SCOPE_NAME = 'linear' +DNN_SCOPE_NAME = 'dnn' + + +def _summary_key(head_name, val): + return '%s/%s' % (val, head_name) if head_name else val + + +class Head(_Head): + + def __init__(self, task, + name=None): + self._task = task + self._name = name + + @property + def name(self): + return self._name + + @property + def logits_dimension(self): + return 1 + + def _eval_metric_ops(self, + labels, + logits, + predictions, + unweighted_loss, + weights=None): + + labels = to_float(labels) + predictions = to_float(predictions) + + # with name_scope(None, 'metrics', (labels, logits, predictions, + # unweighted_loss, weights)): + metrics = get_metrics() + losses = get_losses() + + metric_ops = { + _summary_key(self._name, "prediction/mean"): metrics.mean(predictions, weights=weights), + _summary_key(self._name, "label/mean"): metrics.mean(labels, weights=weights), + } + + summary_scalar("prediction/mean", metric_ops[_summary_key(self._name, "prediction/mean")][1]) + summary_scalar("label/mean", metric_ops[_summary_key(self._name, "label/mean")][1]) + + + mean_loss = losses.compute_weighted_loss( + unweighted_loss, weights=1.0, reduction=losses.Reduction.MEAN) + + if self._task == "binary": + metric_ops[_summary_key(self._name, "LogLoss")] = metrics.mean(mean_loss, weights=weights, ) + summary_scalar("LogLoss", mean_loss) + + metric_ops[_summary_key(self._name, "AUC")] = metrics.auc(labels, predictions, weights=weights) + summary_scalar("AUC", metric_ops[_summary_key(self._name, "AUC")][1]) + else: + + metric_ops[_summary_key(self._name, "MSE")] = metrics.mean_squared_error(labels, predictions, + weights=weights) + summary_scalar("MSE", mean_loss) + + metric_ops[_summary_key(self._name, "MAE")] = metrics.mean_absolute_error(labels, predictions, + weights=weights) + summary_scalar("MAE", metric_ops[_summary_key(self._name, "MAE")][1]) + + return metric_ops + + def create_loss(self, features, mode, logits, labels): + del mode, features # Unused for this head. + losses = get_losses() + if self._task == "binary": + loss = losses.sigmoid_cross_entropy(labels, logits, reduction=losses.Reduction.NONE) + else: + loss = losses.mean_squared_error(labels, logits, reduction=losses.Reduction.NONE) + return loss + + def create_estimator_spec( + self, features, mode, logits, labels=None, train_op_fn=None, training_chief_hooks=None): + # with name_scope('head'): + logits = tf.reshape(logits, [-1, 1]) + if self._task == 'binary': + pred = tf.sigmoid(logits) + else: + pred = logits + + predictions = {"pred": pred, "logits": logits} + export_outputs = {"predict": tf.estimator.export.PredictOutput(predictions)} + if mode == tf.estimator.ModeKeys.PREDICT: + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + export_outputs=export_outputs) + + labels = tf.reshape(labels, [-1, 1]) + + unweighted_loss = self.create_loss(features, mode, logits, labels) + + losses = get_losses() + loss = losses.compute_weighted_loss( + unweighted_loss, weights=1.0, reduction=losses.Reduction.SUM) + reg_loss = losses.get_regularization_loss() + + training_loss = loss + reg_loss + + eval_metric_ops = self._eval_metric_ops(labels, logits, pred, unweighted_loss) + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=training_loss, + train_op=train_op_fn(training_loss), + eval_metric_ops=eval_metric_ops, + training_chief_hooks=training_chief_hooks) + + +def deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks): + linear_optimizer = get_optimizer_instance(linear_optimizer, 0.005) + dnn_optimizer = get_optimizer_instance(dnn_optimizer, 0.01) + train_op_fn = get_train_op_fn(linear_optimizer, dnn_optimizer) + + head = Head(task) + return head.create_estimator_spec(features=features, + mode=mode, + labels=labels, + train_op_fn=train_op_fn, + logits=logits, training_chief_hooks=training_chief_hooks) + + +def get_train_op_fn(linear_optimizer, dnn_optimizer): + def _train_op_fn(loss): + train_ops = [] + try: + global_step = tf.train.get_global_step() + except AttributeError: + global_step = tf.compat.v1.train.get_global_step() + linear_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME) + dnn_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, DNN_SCOPE_NAME) + + if len(dnn_var_list) > 0: + train_ops.append( + dnn_optimizer.minimize( + loss, + var_list=dnn_var_list)) + if len(linear_var_list) > 0: + train_ops.append( + linear_optimizer.minimize( + loss, + var_list=linear_var_list)) + + train_op = tf.group(*train_ops) + with tf.control_dependencies([train_op]): + try: + return tf.assign_add(global_step, 1).op + except AttributeError: + return tf.compat.v1.assign_add(global_step, 1).op + + return _train_op_fn + + +def variable_scope(name_or_scope): + try: + return tf.variable_scope(name_or_scope) + except AttributeError: + return tf.compat.v1.variable_scope(name_or_scope) + +def get_collection(key, scope=None): + try: + return tf.get_collection(key, scope=scope) + except AttributeError: + return tf.compat.v1.get_collection(key, scope=scope) + + +def get_GraphKeys(): + try: + return tf.GraphKeys + except AttributeError: + return tf.compat.v1.GraphKeys + + +def get_losses(): + try: + return tf.compat.v1.losses + except AttributeError: + return tf.losses + + +def input_layer(features, feature_columns): + try: + return tf.feature_column.input_layer(features, feature_columns) + except AttributeError: + return tf.compat.v1.feature_column.input_layer(features, feature_columns) + + +def get_metrics(): + try: + return tf.compat.v1.metrics + except AttributeError: + return tf.metrics + + +def to_float(x, name="ToFloat"): + try: + return tf.to_float(x, name) + except AttributeError: + return tf.compat.v1.to_float(x, name) + + +def summary_scalar(name, data): + try: + tf.summary.scalar(name, data) + except AttributeError: # tf version 2.5.0+:AttributeError: module 'tensorflow._api.v2.summary' has no attribute 'scalar' + tf.compat.v1.summary.scalar(name, data) \ No newline at end of file diff --git a/modelzoo/PNN/script/feature_column.py b/modelzoo/PNN/script/feature_column.py new file mode 100644 index 00000000000..3b778360b33 --- /dev/null +++ b/modelzoo/PNN/script/feature_column.py @@ -0,0 +1,249 @@ +import tensorflow as tf +from collections import namedtuple, OrderedDict +from copy import copy +from itertools import chain + +from tensorflow.python.keras.initializers import RandomNormal, Zeros +from tensorflow.python.keras.layers import Input, Lambda + +from .inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \ + get_varlen_pooling_list, mergeDict +from .layers import Linear +from .layers.utils import concat_func +#from keras import backend as K +import pandas as pd +import numpy as np + +fi = open('../../deep_ctr_master/data/fm.model.txt','r') + +first = True +feat_weights={} +k=0 +for line in fi: + s = line.strip().split() + if first: + first = False + w_0 = float(s[0]) + feat_num = int(s[1]) + k = int(s[2]) + 1 # w and v + + else: + feat = int(s[0]) + weights = [float(s[1 + i]) for i in range(k)] + feat_weights[feat] = weights + +list1 =[] +for col,val in feat_weights.items(): + list1.append(val) + +# def my_init(shape,dtype=None): +# weight = np.array(list1) +# +# return weight.reshape(shape) + + +DEFAULT_GROUP_NAME = "default_group" + + +class SparseFeat(namedtuple('SparseFeat', + ['name', 'vocabulary_size', 'embedding_dim', 'use_hash', 'vocabulary_path', 'dtype', 'embeddings_initializer', + 'embedding_name', + 'group_name', 'trainable'])): + __slots__ = () + + def __new__(cls, name, vocabulary_size, embedding_dim=4, use_hash=False, vocabulary_path=None, dtype="int32", embeddings_initializer=None, + embedding_name=None, + group_name=DEFAULT_GROUP_NAME, trainable=True): + + if embedding_dim == "auto": + embedding_dim = 6 * int(pow(vocabulary_size, 0.25)) + if embeddings_initializer is None: + embeddings_initializer = RandomNormal(mean=0.0, stddev=0.0001, seed=2020) + # if embeddings_initializer=='fm': + # embeddings_initializer = my_init(shape=(vocabulary_size,embedding_dim)) + + + + if embedding_name is None: + embedding_name = name + + return super(SparseFeat, cls).__new__(cls, name, vocabulary_size, embedding_dim, use_hash, vocabulary_path, dtype, + embeddings_initializer, + embedding_name, group_name, trainable) + + def __hash__(self): + return self.name.__hash__() + + +class VarLenSparseFeat(namedtuple('VarLenSparseFeat', + ['sparsefeat', 'maxlen', 'combiner', 'length_name', 'weight_name', 'weight_norm'])): + __slots__ = () + + def __new__(cls, sparsefeat, maxlen, combiner="mean", length_name=None, weight_name=None, weight_norm=True): + return super(VarLenSparseFeat, cls).__new__(cls, sparsefeat, maxlen, combiner, length_name, weight_name, + weight_norm) + + @property + def name(self): + return self.sparsefeat.name + + @property + def vocabulary_size(self): + return self.sparsefeat.vocabulary_size + + @property + def embedding_dim(self): + return self.sparsefeat.embedding_dim + + @property + def use_hash(self): + return self.sparsefeat.use_hash + + @property + def vocabulary_path(self): + return self.sparsefeat.vocabulary_path + + @property + def dtype(self): + return self.sparsefeat.dtype + + @property + def embeddings_initializer(self): + return self.sparsefeat.embeddings_initializer + + @property + def embedding_name(self): + return self.sparsefeat.embedding_name + + @property + def group_name(self): + return self.sparsefeat.group_name + + @property + def trainable(self): + return self.sparsefeat.trainable + + def __hash__(self): + return self.name.__hash__() + + +class DenseFeat(namedtuple('DenseFeat', ['name', 'dimension', 'dtype', 'transform_fn'])): + """ Dense feature + Args: + name: feature name, + dimension: dimension of the feature, default = 1. + dtype: dtype of the feature, default="float32". + transform_fn: If not `None` , a function that can be used to transform + values of the feature. the function takes the input Tensor as its + argument, and returns the output Tensor. + (e.g. lambda x: (x - 3.0) / 4.2). + """ + __slots__ = () + + def __new__(cls, name, dimension=1, dtype="float32", transform_fn=None): + return super(DenseFeat, cls).__new__(cls, name, dimension, dtype, transform_fn) + + def __hash__(self): + return self.name.__hash__() + + # def __eq__(self, other): + # if self.name == other.name: + # return True + # return False + + # def __repr__(self): + # return 'DenseFeat:'+self.name + + +def get_feature_names(feature_columns): + features = build_input_features(feature_columns) + return list(features.keys()) + + +def build_input_features(feature_columns, prefix=''): + input_features = OrderedDict() + for fc in feature_columns: + if isinstance(fc, SparseFeat): + input_features[fc.name] = Input( + shape=(1,), name=prefix + fc.name, dtype=fc.dtype) + elif isinstance(fc, DenseFeat): + input_features[fc.name] = Input( + shape=(fc.dimension,), name=prefix + fc.name, dtype=fc.dtype) + elif isinstance(fc, VarLenSparseFeat): + input_features[fc.name] = Input(shape=(fc.maxlen,), name=prefix + fc.name, + dtype=fc.dtype) + if fc.weight_name is not None: + input_features[fc.weight_name] = Input(shape=(fc.maxlen, 1), name=prefix + fc.weight_name, + dtype="float32") + if fc.length_name is not None: + input_features[fc.length_name] = Input((1,), name=prefix + fc.length_name, dtype='int32') + + else: + raise TypeError("Invalid feature column type,got", type(fc)) + + return input_features + + +def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear', + l2_reg=0, sparse_feat_refine_weight=None): + linear_feature_columns = copy(feature_columns) + for i in range(len(linear_feature_columns)): + if isinstance(linear_feature_columns[i], SparseFeat): + linear_feature_columns[i] = linear_feature_columns[i]._replace(embedding_dim=1, + embeddings_initializer=Zeros()) + if isinstance(linear_feature_columns[i], VarLenSparseFeat): + linear_feature_columns[i] = linear_feature_columns[i]._replace( + sparsefeat=linear_feature_columns[i].sparsefeat._replace(embedding_dim=1, + embeddings_initializer=Zeros())) + + linear_emb_list = [input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, + prefix=prefix + str(i))[0] for i in range(units)] + _, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix) + + linear_logit_list = [] + for i in range(units): + + if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0: + sparse_input = concat_func(linear_emb_list[i]) + dense_input = concat_func(dense_input_list) + if sparse_feat_refine_weight is not None: + sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))( + [sparse_input, sparse_feat_refine_weight]) + linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input]) + elif len(linear_emb_list[i]) > 0: + sparse_input = concat_func(linear_emb_list[i]) + if sparse_feat_refine_weight is not None: + sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))( + [sparse_input, sparse_feat_refine_weight]) + linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input) + elif len(dense_input_list) > 0: + dense_input = concat_func(dense_input_list) + linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input) + else: #empty feature_columns + return Lambda(lambda x: tf.constant([[0.0]]))(list(features.values())[0]) + linear_logit_list.append(linear_logit) + + return concat_func(linear_logit_list) + + +def input_from_feature_columns(features, feature_columns, l2_reg, seed, prefix='', seq_mask_zero=True, + support_dense=True, support_group=False): + sparse_feature_columns = list( + filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else [] + varlen_sparse_feature_columns = list( + filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else [] + + embedding_matrix_dict = create_embedding_matrix(feature_columns, l2_reg, seed, prefix=prefix, + seq_mask_zero=seq_mask_zero) + group_sparse_embedding_dict = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns) + dense_value_list = get_dense_input(features, feature_columns) + if not support_dense and len(dense_value_list) > 0: + raise ValueError("DenseFeat is not supported in dnn_feature_columns") + + sequence_embed_dict = varlen_embedding_lookup(embedding_matrix_dict, features, varlen_sparse_feature_columns) + group_varlen_sparse_embedding_dict = get_varlen_pooling_list(sequence_embed_dict, features, + varlen_sparse_feature_columns) + group_embedding_dict = mergeDict(group_sparse_embedding_dict, group_varlen_sparse_embedding_dict) + if not support_group: + group_embedding_dict = list(chain.from_iterable(group_embedding_dict.values())) + return group_embedding_dict, dense_value_list diff --git a/modelzoo/PNN/script/inputs.py b/modelzoo/PNN/script/inputs.py new file mode 100644 index 00000000000..d567f846265 --- /dev/null +++ b/modelzoo/PNN/script/inputs.py @@ -0,0 +1,155 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +from collections import defaultdict +from itertools import chain + +from tensorflow.python.keras.layers import Embedding, Lambda +from tensorflow.python.keras.regularizers import l2 + +from .layers.sequence import SequencePoolingLayer, WeightedSequenceLayer +from .layers.utils import Hash + + +def get_inputs_list(inputs): + return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs))))) + + +def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, l2_reg, + prefix='sparse_', seq_mask_zero=True): + sparse_embedding = {} + for feat in sparse_feature_columns: + emb = Embedding(feat.vocabulary_size, feat.embedding_dim, + embeddings_initializer=feat.embeddings_initializer, + embeddings_regularizer=l2(l2_reg), + name=prefix + '_emb_' + feat.embedding_name) + emb.trainable = feat.trainable + sparse_embedding[feat.embedding_name] = emb + + if varlen_sparse_feature_columns and len(varlen_sparse_feature_columns) > 0: + for feat in varlen_sparse_feature_columns: + # if feat.name not in sparse_embedding: + emb = Embedding(feat.vocabulary_size, feat.embedding_dim, + embeddings_initializer=feat.embeddings_initializer, + embeddings_regularizer=l2( + l2_reg), + name=prefix + '_seq_emb_' + feat.name, + mask_zero=seq_mask_zero) + emb.trainable = feat.trainable + sparse_embedding[feat.embedding_name] = emb + return sparse_embedding + + +def get_embedding_vec_list(embedding_dict, input_dict, sparse_feature_columns, return_feat_list=(), mask_feat_list=()): + embedding_vec_list = [] + for fg in sparse_feature_columns: + feat_name = fg.name + if len(return_feat_list) == 0 or feat_name in return_feat_list: + if fg.use_hash: + lookup_idx = Hash(fg.vocabulary_size, mask_zero=(feat_name in mask_feat_list), vocabulary_path=fg.vocabulary_path)(input_dict[feat_name]) + else: + lookup_idx = input_dict[feat_name] + + embedding_vec_list.append(embedding_dict[feat_name](lookup_idx)) + + return embedding_vec_list + + +def create_embedding_matrix(feature_columns, l2_reg, seed, prefix="", seq_mask_zero=True): + from . import feature_column as fc_lib + + sparse_feature_columns = list( + filter(lambda x: isinstance(x, fc_lib.SparseFeat), feature_columns)) if feature_columns else [] + varlen_sparse_feature_columns = list( + filter(lambda x: isinstance(x, fc_lib.VarLenSparseFeat), feature_columns)) if feature_columns else [] + sparse_emb_dict = create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, + l2_reg, prefix=prefix + 'sparse', seq_mask_zero=seq_mask_zero) + return sparse_emb_dict + + +def embedding_lookup(sparse_embedding_dict, sparse_input_dict, sparse_feature_columns, return_feat_list=(), + mask_feat_list=(), to_list=False): + group_embedding_dict = defaultdict(list) + for fc in sparse_feature_columns: + feature_name = fc.name + embedding_name = fc.embedding_name + if (len(return_feat_list) == 0 or feature_name in return_feat_list): + if fc.use_hash: + lookup_idx = Hash(fc.vocabulary_size, mask_zero=(feature_name in mask_feat_list), vocabulary_path=fc.vocabulary_path)( + sparse_input_dict[feature_name]) + else: + lookup_idx = sparse_input_dict[feature_name] + + group_embedding_dict[fc.group_name].append(sparse_embedding_dict[embedding_name](lookup_idx)) + if to_list: + return list(chain.from_iterable(group_embedding_dict.values())) + return group_embedding_dict + + +def varlen_embedding_lookup(embedding_dict, sequence_input_dict, varlen_sparse_feature_columns): + varlen_embedding_vec_dict = {} + for fc in varlen_sparse_feature_columns: + feature_name = fc.name + embedding_name = fc.embedding_name + if fc.use_hash: + lookup_idx = Hash(fc.vocabulary_size, mask_zero=True, vocabulary_path=fc.vocabulary_path)(sequence_input_dict[feature_name]) + else: + lookup_idx = sequence_input_dict[feature_name] + varlen_embedding_vec_dict[feature_name] = embedding_dict[embedding_name](lookup_idx) + return varlen_embedding_vec_dict + + +def get_varlen_pooling_list(embedding_dict, features, varlen_sparse_feature_columns, to_list=False): + pooling_vec_list = defaultdict(list) + for fc in varlen_sparse_feature_columns: + feature_name = fc.name + combiner = fc.combiner + feature_length_name = fc.length_name + if feature_length_name is not None: + if fc.weight_name is not None: + seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm)( + [embedding_dict[feature_name], features[feature_length_name], features[fc.weight_name]]) + else: + seq_input = embedding_dict[feature_name] + vec = SequencePoolingLayer(combiner, supports_masking=False)( + [seq_input, features[feature_length_name]]) + else: + if fc.weight_name is not None: + seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm, supports_masking=True)( + [embedding_dict[feature_name], features[fc.weight_name]]) + else: + seq_input = embedding_dict[feature_name] + vec = SequencePoolingLayer(combiner, supports_masking=True)( + seq_input) + pooling_vec_list[fc.group_name].append(vec) + if to_list: + return chain.from_iterable(pooling_vec_list.values()) + return pooling_vec_list + + +def get_dense_input(features, feature_columns): + from . import feature_column as fc_lib + dense_feature_columns = list( + filter(lambda x: isinstance(x, fc_lib.DenseFeat), feature_columns)) if feature_columns else [] + dense_input_list = [] + for fc in dense_feature_columns: + if fc.transform_fn is None: + dense_input_list.append(features[fc.name]) + else: + transform_result = Lambda(fc.transform_fn)(features[fc.name]) + dense_input_list.append(transform_result) + return dense_input_list + + +def mergeDict(a, b): + c = defaultdict(list) + for k, v in a.items(): + c[k].extend(v) + for k, v in b.items(): + c[k].extend(v) + return c diff --git a/modelzoo/PNN/script/layers/__init__.py b/modelzoo/PNN/script/layers/__init__.py new file mode 100644 index 00000000000..1bfd40effe7 --- /dev/null +++ b/modelzoo/PNN/script/layers/__init__.py @@ -0,0 +1,52 @@ +import tensorflow as tf + +from .activation import Dice +from .core import DNN, LocalActivationUnit, PredictionLayer +from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet, CrossNetMix, + InnerProductLayer, InteractingLayer, + OutterProductLayer, FGCNNLayer, SENETLayer, BilinearInteraction, + FieldWiseBiInteraction, FwFMLayer, FEFMLayer) +from .normalization import LayerNormalization +from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM, + KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer, + Transformer, DynamicGRU,PositionEncoding) + +from .utils import NoMask, Hash, Linear, _Add, combined_dnn_input, softmax, reduce_sum + +custom_objects = {'tf': tf, + 'InnerProductLayer': InnerProductLayer, + 'OutterProductLayer': OutterProductLayer, + 'DNN': DNN, + 'PredictionLayer': PredictionLayer, + 'FM': FM, + 'AFMLayer': AFMLayer, + 'CrossNet': CrossNet, + 'CrossNetMix': CrossNetMix, + 'BiInteractionPooling': BiInteractionPooling, + 'LocalActivationUnit': LocalActivationUnit, + 'Dice': Dice, + 'SequencePoolingLayer': SequencePoolingLayer, + 'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer, + 'CIN': CIN, + 'InteractingLayer': InteractingLayer, + 'LayerNormalization': LayerNormalization, + 'BiLSTM': BiLSTM, + 'Transformer': Transformer, + 'NoMask': NoMask, + 'BiasEncoding': BiasEncoding, + 'KMaxPooling': KMaxPooling, + 'FGCNNLayer': FGCNNLayer, + 'Hash': Hash, + 'Linear': Linear, + 'DynamicGRU': DynamicGRU, + 'SENETLayer': SENETLayer, + 'BilinearInteraction': BilinearInteraction, + 'WeightedSequenceLayer': WeightedSequenceLayer, + '_Add': _Add, + 'FieldWiseBiInteraction': FieldWiseBiInteraction, + 'FwFMLayer': FwFMLayer, + 'softmax': softmax, + 'FEFMLayer': FEFMLayer, + 'reduce_sum': reduce_sum, + 'PositionEncoding':PositionEncoding + } diff --git a/modelzoo/PNN/script/layers/activation.py b/modelzoo/PNN/script/layers/activation.py new file mode 100644 index 00000000000..1b953bff8bc --- /dev/null +++ b/modelzoo/PNN/script/layers/activation.py @@ -0,0 +1,85 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import tensorflow as tf + +try: + from tensorflow.python.ops.init_ops import Zeros +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros +from tensorflow.python.keras.layers import Layer, Activation + +try: + from tensorflow.python.keras.layers import BatchNormalization +except ImportError: + BatchNormalization = tf.keras.layers.BatchNormalization + +try: + unicode +except NameError: + unicode = str + + +class Dice(Layer): + """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data. + + Input shape + - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. + + Output shape + - Same shape as the input. + + Arguments + - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). + + - **epsilon** : Small float added to variance to avoid dividing by zero. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, axis=-1, epsilon=1e-9, **kwargs): + self.axis = axis + self.epsilon = epsilon + super(Dice, self).__init__(**kwargs) + + def build(self, input_shape): + self.bn = BatchNormalization( + axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros( + ), dtype=tf.float32, name='dice_alpha') # name='alpha_'+self.name + super(Dice, self).build(input_shape) # Be sure to call this somewhere! + self.uses_learning_phase = True + + def call(self, inputs, training=None, **kwargs): + inputs_normed = self.bn(inputs, training=training) + # tf.layers.batch_normalization( + # inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + x_p = tf.sigmoid(inputs_normed) + return self.alphas * (1.0 - x_p) * inputs + x_p * inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'axis': self.axis, 'epsilon': self.epsilon} + base_config = super(Dice, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def activation_layer(activation): + if activation in ("dice", "Dice"): + act_layer = Dice() + elif isinstance(activation, (str, unicode)): + act_layer = Activation(activation) + elif issubclass(activation, Layer): + act_layer = activation() + else: + raise ValueError( + "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation)) + return act_layer diff --git a/modelzoo/PNN/script/layers/core.py b/modelzoo/PNN/script/layers/core.py new file mode 100644 index 00000000000..668348d2eb7 --- /dev/null +++ b/modelzoo/PNN/script/layers/core.py @@ -0,0 +1,267 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import tensorflow as tf +from tensorflow.python.keras import backend as K + +try: + from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal +except ImportError: + from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal + +from tensorflow.python.keras.layers import Layer, Dropout + +try: + from tensorflow.python.keras.layers import BatchNormalization +except ImportError: + BatchNormalization = tf.keras.layers.BatchNormalization +from tensorflow.python.keras.regularizers import l2 + +from .activation import activation_layer + + +class LocalActivationUnit(Layer): + """The LocalActivationUnit used in DIN with which the representation of + user interests varies adaptively given different candidate items. + + Input shape + - A list of two 3D tensor with shape: ``(batch_size, 1, embedding_size)`` and ``(batch_size, T, embedding_size)`` + + Output shape + - 3D tensor with shape: ``(batch_size, T, 1)``. + + Arguments + - **hidden_units**:list of positive integer, the attention net layer number and units in each layer. + + - **activation**: Activation function to use in attention net. + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix of attention net. + + - **dropout_rate**: float in [0,1). Fraction of the units to dropout in attention net. + + - **use_bn**: bool. Whether use BatchNormalization before activation or not in attention net. + + - **seed**: A Python integer to use as random seed. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, hidden_units=(64, 32), activation='sigmoid', l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, + **kwargs): + self.hidden_units = hidden_units + self.activation = activation + self.l2_reg = l2_reg + self.dropout_rate = dropout_rate + self.use_bn = use_bn + self.seed = seed + super(LocalActivationUnit, self).__init__(**kwargs) + self.supports_masking = True + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) != 2: + raise ValueError('A `LocalActivationUnit` layer should be called ' + 'on a list of 2 inputs') + + if len(input_shape[0]) != 3 or len(input_shape[1]) != 3: + raise ValueError("Unexpected inputs dimensions %d and %d, expect to be 3 dimensions" % ( + len(input_shape[0]), len(input_shape[1]))) + + if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1: + raise ValueError('A `LocalActivationUnit` layer requires ' + 'inputs of a two inputs with shape (None,1,embedding_size) and (None,T,embedding_size)' + 'Got different shapes: %s,%s' % (input_shape[0], input_shape[1])) + size = 4 * \ + int(input_shape[0][-1] + ) if len(self.hidden_units) == 0 else self.hidden_units[-1] + self.kernel = self.add_weight(shape=(size, 1), + initializer=glorot_normal( + seed=self.seed), + name="kernel") + self.bias = self.add_weight( + shape=(1,), initializer=Zeros(), name="bias") + self.dnn = DNN(self.hidden_units, self.activation, self.l2_reg, self.dropout_rate, self.use_bn, seed=self.seed) + + super(LocalActivationUnit, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, training=None, **kwargs): + + query, keys = inputs + + keys_len = keys.get_shape()[1] + queries = K.repeat_elements(query, keys_len, 1) + + att_input = tf.concat( + [queries, keys, queries - keys, queries * keys], axis=-1) + + att_out = self.dnn(att_input, training=training) + + attention_score = tf.nn.bias_add(tf.tensordot(att_out, self.kernel, axes=(-1, 0)), self.bias) + + return attention_score + + def compute_output_shape(self, input_shape): + return input_shape[1][:2] + (1,) + + def compute_mask(self, inputs, mask): + return mask + + def get_config(self, ): + config = {'activation': self.activation, 'hidden_units': self.hidden_units, + 'l2_reg': self.l2_reg, 'dropout_rate': self.dropout_rate, 'use_bn': self.use_bn, 'seed': self.seed} + base_config = super(LocalActivationUnit, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class DNN(Layer): + """The Multi Layer Percetron + + Input shape + - nD tensor with shape: ``(batch_size, ..., input_dim)``. The most common situation would be a 2D input with shape ``(batch_size, input_dim)``. + + Output shape + - nD tensor with shape: ``(batch_size, ..., hidden_size[-1])``. For instance, for a 2D input with shape ``(batch_size, input_dim)``, the output would have shape ``(batch_size, hidden_size[-1])``. + + Arguments + - **hidden_units**:list of positive integer, the layer number and units in each layer. + + - **activation**: Activation function to use. + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix. + + - **dropout_rate**: float in [0,1). Fraction of the units to dropout. + + - **use_bn**: bool. Whether use BatchNormalization before activation or not. + + - **output_activation**: Activation function to use in the last layer.If ``None``,it will be same as ``activation``. + + - **seed**: A Python integer to use as random seed. + """ + + def __init__(self, hidden_units, activation='relu', l2_reg=0, dropout_rate=0, use_bn=False, output_activation=None, + seed=1024, **kwargs): + self.hidden_units = hidden_units + self.activation = activation + self.l2_reg = l2_reg + self.dropout_rate = dropout_rate + self.use_bn = use_bn + self.output_activation = output_activation + self.seed = seed + + super(DNN, self).__init__(**kwargs) + + def build(self, input_shape): + # if len(self.hidden_units) == 0: + # raise ValueError("hidden_units is empty") + input_size = input_shape[-1] + hidden_units = [int(input_size)] + list(self.hidden_units) + self.kernels = [self.add_weight(name='kernel' + str(i), + shape=( + hidden_units[i], hidden_units[i + 1]), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(len(self.hidden_units))] + self.bias = [self.add_weight(name='bias' + str(i), + shape=(self.hidden_units[i],), + initializer=Zeros(), + trainable=True) for i in range(len(self.hidden_units))] + if self.use_bn: + self.bn_layers = [BatchNormalization() for _ in range(len(self.hidden_units))] + + self.dropout_layers = [Dropout(self.dropout_rate, seed=self.seed + i) for i in + range(len(self.hidden_units))] + + self.activation_layers = [activation_layer(self.activation) for _ in range(len(self.hidden_units))] + + if self.output_activation: + self.activation_layers[-1] = activation_layer(self.output_activation) + + super(DNN, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, training=None, **kwargs): + + deep_input = inputs + + for i in range(len(self.hidden_units)): + fc = tf.nn.bias_add(tf.tensordot( + deep_input, self.kernels[i], axes=(-1, 0)), self.bias[i]) + + if self.use_bn: + fc = self.bn_layers[i](fc, training=training) + try: + fc = self.activation_layers[i](fc, training=training) + except TypeError as e: # TypeError: call() got an unexpected keyword argument 'training' + print("make sure the activation function use training flag properly", e) + fc = self.activation_layers[i](fc) + + fc = self.dropout_layers[i](fc, training=training) + deep_input = fc + + return deep_input + + def compute_output_shape(self, input_shape): + if len(self.hidden_units) > 0: + shape = input_shape[:-1] + (self.hidden_units[-1],) + else: + shape = input_shape + + return tuple(shape) + + def get_config(self, ): + config = {'activation': self.activation, 'hidden_units': self.hidden_units, + 'l2_reg': self.l2_reg, 'use_bn': self.use_bn, 'dropout_rate': self.dropout_rate, + 'output_activation': self.output_activation, 'seed': self.seed} + base_config = super(DNN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class PredictionLayer(Layer): + """ + Arguments + - **task**: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + + - **use_bias**: bool.Whether add bias term or not. + """ + + def __init__(self, task='binary', use_bias=True, **kwargs): + if task not in ["binary", "multiclass", "regression"]: + raise ValueError("task must be binary,multiclass or regression") + self.task = task + self.use_bias = use_bias + super(PredictionLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if self.use_bias: + self.global_bias = self.add_weight( + shape=(1,), initializer=Zeros(), name="global_bias") + + # Be sure to call this somewhere! + super(PredictionLayer, self).build(input_shape) + + def call(self, inputs, **kwargs): + x = inputs + if self.use_bias: + x = tf.nn.bias_add(x, self.global_bias, data_format='NHWC') + if self.task == "binary": + x = tf.sigmoid(x) + + output = tf.reshape(x, (-1, 1)) + + return output + + def compute_output_shape(self, input_shape): + return (None, 1) + + def get_config(self, ): + config = {'task': self.task, 'use_bias': self.use_bias} + base_config = super(PredictionLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/modelzoo/PNN/script/layers/interaction.py b/modelzoo/PNN/script/layers/interaction.py new file mode 100644 index 00000000000..e18b159059a --- /dev/null +++ b/modelzoo/PNN/script/layers/interaction.py @@ -0,0 +1,1492 @@ +# -*- coding:utf-8 -*- +""" + +Authors: + Weichen Shen,weichenswc@163.com, + Harshit Pande + +""" + +import itertools + +import tensorflow as tf +from tensorflow.python.keras import backend as K +from tensorflow.python.keras.backend import batch_dot + +try: + from tensorflow.python.ops.init_ops import Zeros, Ones, Constant, TruncatedNormal, \ + glorot_normal_initializer as glorot_normal, \ + glorot_uniform_initializer as glorot_uniform +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros, Ones, Constant, TruncatedNormal, glorot_normal, glorot_uniform + +from tensorflow.python.keras.layers import Layer, MaxPooling2D, Conv2D, Dropout, Lambda, Dense, Flatten +from tensorflow.python.keras.regularizers import l2 +from tensorflow.python.layers import utils + +from .activation import activation_layer +from .utils import concat_func, reduce_sum, softmax, reduce_mean + + +class AFMLayer(Layer): + """Attentonal Factorization Machine models pairwise (order-2) feature + interactions without linear term and bias. + + Input shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + + Arguments + - **attention_factor** : Positive integer, dimensionality of the + attention network output space. + + - **l2_reg_w** : float between 0 and 1. L2 regularizer strength + applied to attention network. + + - **dropout_rate** : float between in [0,1). Fraction of the attention net output units to dropout. + + - **seed** : A Python integer to use as random seed. + + References + - [Attentional Factorization Machines : Learning the Weight of Feature + Interactions via Attention Networks](https://arxiv.org/pdf/1708.04617.pdf) + """ + + def __init__(self, attention_factor=4, l2_reg_w=0, dropout_rate=0, seed=1024, **kwargs): + self.attention_factor = attention_factor + self.l2_reg_w = l2_reg_w + self.dropout_rate = dropout_rate + self.seed = seed + super(AFMLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + # input_shape = input_shape[0] + # if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `AttentionalFM` layer should be called ' + 'on a list of at least 2 inputs') + + shape_set = set() + reduced_input_shape = [shape.as_list() for shape in input_shape] + for i in range(len(input_shape)): + shape_set.add(tuple(reduced_input_shape[i])) + + if len(shape_set) > 1: + raise ValueError('A `AttentionalFM` layer requires ' + 'inputs with same shapes ' + 'Got different shapes: %s' % (shape_set)) + + if len(input_shape[0]) != 3 or input_shape[0][1] != 1: + raise ValueError('A `AttentionalFM` layer requires ' + 'inputs of a list with same shape tensor like\ + (None, 1, embedding_size)' + 'Got different shapes: %s' % (input_shape[0])) + + embedding_size = int(input_shape[0][-1]) + + self.attention_W = self.add_weight(shape=(embedding_size, + self.attention_factor), initializer=glorot_normal(seed=self.seed), + regularizer=l2(self.l2_reg_w), name="attention_W") + self.attention_b = self.add_weight( + shape=(self.attention_factor,), initializer=Zeros(), name="attention_b") + self.projection_h = self.add_weight(shape=(self.attention_factor, 1), + initializer=glorot_normal(seed=self.seed), name="projection_h") + self.projection_p = self.add_weight(shape=( + embedding_size, 1), initializer=glorot_normal(seed=self.seed), name="projection_p") + self.dropout = Dropout( + self.dropout_rate, seed=self.seed) + + self.tensordot = Lambda( + lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0))) + + # Be sure to call this somewhere! + super(AFMLayer, self).build(input_shape) + + def call(self, inputs, training=None, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embeds_vec_list = inputs + row = [] + col = [] + + for r, c in itertools.combinations(embeds_vec_list, 2): + row.append(r) + col.append(c) + + p = tf.concat(row, axis=1) + q = tf.concat(col, axis=1) + inner_product = p * q + + bi_interaction = inner_product + attention_temp = tf.nn.relu(tf.nn.bias_add(tf.tensordot( + bi_interaction, self.attention_W, axes=(-1, 0)), self.attention_b)) + # Dense(self.attention_factor,'relu',kernel_regularizer=l2(self.l2_reg_w))(bi_interaction) + self.normalized_att_score = softmax(tf.tensordot( + attention_temp, self.projection_h, axes=(-1, 0)), dim=1) + attention_output = reduce_sum( + self.normalized_att_score * bi_interaction, axis=1) + + attention_output = self.dropout(attention_output, training=training) # training + + afm_out = self.tensordot([attention_output, self.projection_p]) + return afm_out + + def compute_output_shape(self, input_shape): + + if not isinstance(input_shape, list): + raise ValueError('A `AFMLayer` layer should be called ' + 'on a list of inputs.') + return (None, 1) + + def get_config(self, ): + config = {'attention_factor': self.attention_factor, + 'l2_reg_w': self.l2_reg_w, 'dropout_rate': self.dropout_rate, 'seed': self.seed} + base_config = super(AFMLayer, self).get_config() + base_config.update(config) + return base_config + + +class BiInteractionPooling(Layer): + """Bi-Interaction Layer used in Neural FM,compress the + pairwise element-wise product of features into one single vector. + + Input shape + - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``. + + Output shape + - 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + References + - [He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364.](http://arxiv.org/abs/1708.05027) + """ + + def __init__(self, **kwargs): + + super(BiInteractionPooling, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + + super(BiInteractionPooling, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + concated_embeds_value = inputs + square_of_sum = tf.square(reduce_sum( + concated_embeds_value, axis=1, keep_dims=True)) + sum_of_square = reduce_sum( + concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True) + cross_term = 0.5 * (square_of_sum - sum_of_square) + + return cross_term + + def compute_output_shape(self, input_shape): + return (None, 1, input_shape[-1]) + + +class CIN(Layer): + """Compressed Interaction Network used in xDeepFM.This implemention is + adapted from code that the author of the paper published on https://github.com/Leavingseason/xDeepFM. + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, featuremap_num)`` ``featuremap_num = sum(self.layer_size[:-1]) // 2 + self.layer_size[-1]`` if ``split_half=True``,else ``sum(layer_size)`` . + + Arguments + - **layer_size** : list of int.Feature maps in each layer. + + - **activation** : activation function used on feature maps. + + - **split_half** : bool.if set to False, half of the feature maps in each hidden will connect to output unit. + + - **seed** : A Python integer to use as random seed. + + References + - [Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.] (https://arxiv.org/pdf/1803.05170.pdf) + """ + + def __init__(self, layer_size=(128, 128), activation='relu', split_half=True, l2_reg=1e-5, seed=1024, **kwargs): + if len(layer_size) == 0: + raise ValueError( + "layer_size must be a list(tuple) of length greater than 1") + self.layer_size = layer_size + self.split_half = split_half + self.activation = activation + self.l2_reg = l2_reg + self.seed = seed + super(CIN, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + + self.field_nums = [int(input_shape[1])] + self.filters = [] + self.bias = [] + for i, size in enumerate(self.layer_size): + + self.filters.append(self.add_weight(name='filter' + str(i), + shape=[1, self.field_nums[-1] + * self.field_nums[0], size], + dtype=tf.float32, initializer=glorot_uniform( + seed=self.seed + i), + regularizer=l2(self.l2_reg))) + + self.bias.append(self.add_weight(name='bias' + str(i), shape=[size], dtype=tf.float32, + initializer=Zeros())) + + if self.split_half: + if i != len(self.layer_size) - 1 and size % 2 > 0: + raise ValueError( + "layer_size must be even number except for the last layer when split_half=True") + + self.field_nums.append(size // 2) + else: + self.field_nums.append(size) + + self.activation_layers = [activation_layer( + self.activation) for _ in self.layer_size] + + super(CIN, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + dim = int(inputs.get_shape()[-1]) + hidden_nn_layers = [inputs] + final_result = [] + + split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2) + for idx, layer_size in enumerate(self.layer_size): + split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2) + + dot_result_m = tf.matmul( + split_tensor0, split_tensor, transpose_b=True) + + dot_result_o = tf.reshape( + dot_result_m, shape=[dim, -1, self.field_nums[0] * self.field_nums[idx]]) + + dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2]) + + curr_out = tf.nn.conv1d( + dot_result, filters=self.filters[idx], stride=1, padding='VALID') + + curr_out = tf.nn.bias_add(curr_out, self.bias[idx]) + + curr_out = self.activation_layers[idx](curr_out) + + curr_out = tf.transpose(curr_out, perm=[0, 2, 1]) + + if self.split_half: + if idx != len(self.layer_size) - 1: + next_hidden, direct_connect = tf.split( + curr_out, 2 * [layer_size // 2], 1) + else: + direct_connect = curr_out + next_hidden = 0 + else: + direct_connect = curr_out + next_hidden = curr_out + + final_result.append(direct_connect) + hidden_nn_layers.append(next_hidden) + + result = tf.concat(final_result, axis=1) + result = reduce_sum(result, -1, keep_dims=False) + + return result + + def compute_output_shape(self, input_shape): + if self.split_half: + featuremap_num = sum( + self.layer_size[:-1]) // 2 + self.layer_size[-1] + else: + featuremap_num = sum(self.layer_size) + return (None, featuremap_num) + + def get_config(self, ): + + config = {'layer_size': self.layer_size, 'split_half': self.split_half, 'activation': self.activation, + 'seed': self.seed} + base_config = super(CIN, self).get_config() + base_config.update(config) + return base_config + + +class CrossNet(Layer): + """The Cross Network part of Deep&Cross Network model, + which leans both low and high degree cross feature. + + Input shape + - 2D tensor with shape: ``(batch_size, units)``. + + Output shape + - 2D tensor with shape: ``(batch_size, units)``. + + Arguments + - **layer_num**: Positive integer, the cross layer number + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix + + - **parameterization**: string, ``"vector"`` or ``"matrix"`` , way to parameterize the cross network. + + - **seed**: A Python integer to use as random seed. + + References + - [Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12.](https://arxiv.org/abs/1708.05123) + """ + + def __init__(self, layer_num=2, parameterization='vector', l2_reg=0, seed=1024, **kwargs): + self.layer_num = layer_num + self.parameterization = parameterization + self.l2_reg = l2_reg + self.seed = seed + print('CrossNet parameterization:', self.parameterization) + super(CrossNet, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),)) + + dim = int(input_shape[-1]) + if self.parameterization == 'vector': + self.kernels = [self.add_weight(name='kernel' + str(i), + shape=(dim, 1), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + elif self.parameterization == 'matrix': + self.kernels = [self.add_weight(name='kernel' + str(i), + shape=(dim, dim), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + else: # error + raise ValueError("parameterization should be 'vector' or 'matrix'") + self.bias = [self.add_weight(name='bias' + str(i), + shape=(dim, 1), + initializer=Zeros(), + trainable=True) for i in range(self.layer_num)] + # Be sure to call this somewhere! + super(CrossNet, self).build(input_shape) + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs))) + + x_0 = tf.expand_dims(inputs, axis=2) + x_l = x_0 + for i in range(self.layer_num): + if self.parameterization == 'vector': + xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0)) + dot_ = tf.matmul(x_0, xl_w) + x_l = dot_ + self.bias[i] + x_l + elif self.parameterization == 'matrix': + xl_w = tf.einsum('ij,bjk->bik', self.kernels[i], x_l) # W * xi (bs, dim, 1) + dot_ = xl_w + self.bias[i] # W * xi + b + x_l = x_0 * dot_ + x_l # x0 · (W * xi + b) +xl Hadamard-product + else: # error + raise ValueError("parameterization should be 'vector' or 'matrix'") + x_l = tf.squeeze(x_l, axis=2) + return x_l + + def get_config(self, ): + + config = {'layer_num': self.layer_num, 'parameterization': self.parameterization, + 'l2_reg': self.l2_reg, 'seed': self.seed} + base_config = super(CrossNet, self).get_config() + base_config.update(config) + return base_config + + def compute_output_shape(self, input_shape): + return input_shape + + +class CrossNetMix(Layer): + """The Cross Network part of DCN-Mix model, which improves DCN-M by: + 1 add MOE to learn feature interactions in different subspaces + 2 add nonlinear transformations in low-dimensional space + + Input shape + - 2D tensor with shape: ``(batch_size, units)``. + + Output shape + - 2D tensor with shape: ``(batch_size, units)``. + + Arguments + - **low_rank** : Positive integer, dimensionality of low-rank sapce. + + - **num_experts** : Positive integer, number of experts. + + - **layer_num**: Positive integer, the cross layer number + + - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix + + - **seed**: A Python integer to use as random seed. + + References + - [Wang R, Shivanna R, Cheng D Z, et al. DCN-M: Improved Deep & Cross Network for Feature Cross Learning in Web-scale Learning to Rank Systems[J]. 2020.](https://arxiv.org/abs/2008.13535) + """ + + def __init__(self, low_rank=32, num_experts=4, layer_num=2, l2_reg=0, seed=1024, **kwargs): + self.low_rank = low_rank + self.num_experts = num_experts + self.layer_num = layer_num + self.l2_reg = l2_reg + self.seed = seed + super(CrossNetMix, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),)) + + dim = int(input_shape[-1]) + + # U: (dim, low_rank) + self.U_list = [self.add_weight(name='U_list' + str(i), + shape=(self.num_experts, dim, self.low_rank), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + # V: (dim, low_rank) + self.V_list = [self.add_weight(name='V_list' + str(i), + shape=(self.num_experts, dim, self.low_rank), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + # C: (low_rank, low_rank) + self.C_list = [self.add_weight(name='C_list' + str(i), + shape=(self.num_experts, self.low_rank, self.low_rank), + initializer=glorot_normal( + seed=self.seed), + regularizer=l2(self.l2_reg), + trainable=True) for i in range(self.layer_num)] + + self.gating = [Dense(1, use_bias=False) for i in range(self.num_experts)] + + self.bias = [self.add_weight(name='bias' + str(i), + shape=(dim, 1), + initializer=Zeros(), + trainable=True) for i in range(self.layer_num)] + # Be sure to call this somewhere! + super(CrossNetMix, self).build(input_shape) + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 2: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs))) + + x_0 = tf.expand_dims(inputs, axis=2) + x_l = x_0 + for i in range(self.layer_num): + output_of_experts = [] + gating_score_of_experts = [] + for expert_id in range(self.num_experts): + # (1) G(x_l) + # compute the gating score by x_l + gating_score_of_experts.append(self.gating[expert_id](tf.squeeze(x_l, axis=2))) + + # (2) E(x_l) + # project the input x_l to $\mathbb{R}^{r}$ + v_x = tf.einsum('ij,bjk->bik', tf.transpose(self.V_list[i][expert_id]), x_l) # (bs, low_rank, 1) + + # nonlinear activation in low rank space + v_x = tf.nn.tanh(v_x) + v_x = tf.einsum('ij,bjk->bik', self.C_list[i][expert_id], v_x) # (bs, low_rank, 1) + v_x = tf.nn.tanh(v_x) + + # project back to $\mathbb{R}^{d}$ + uv_x = tf.einsum('ij,bjk->bik', self.U_list[i][expert_id], v_x) # (bs, dim, 1) + + dot_ = uv_x + self.bias[i] + dot_ = x_0 * dot_ # Hadamard-product + + output_of_experts.append(tf.squeeze(dot_, axis=2)) + + # (3) mixture of low-rank experts + output_of_experts = tf.stack(output_of_experts, 2) # (bs, dim, num_experts) + gating_score_of_experts = tf.stack(gating_score_of_experts, 1) # (bs, num_experts, 1) + moe_out = tf.matmul(output_of_experts, tf.nn.softmax(gating_score_of_experts, 1)) + x_l = moe_out + x_l # (bs, dim, 1) + x_l = tf.squeeze(x_l, axis=2) + return x_l + + def get_config(self, ): + + config = {'low_rank': self.low_rank, 'num_experts': self.num_experts, 'layer_num': self.layer_num, + 'l2_reg': self.l2_reg, 'seed': self.seed} + base_config = super(CrossNetMix, self).get_config() + base_config.update(config) + return base_config + + def compute_output_shape(self, input_shape): + return input_shape + + +class FM(Layer): + """Factorization Machine models pairwise (order-2) feature interactions + without linear term and bias. + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + + References + - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) + """ + + def __init__(self, **kwargs): + + super(FM, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError("Unexpected inputs dimensions % d,\ + expect to be 3 dimensions" % (len(input_shape))) + + super(FM, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" + % (K.ndim(inputs))) + + concated_embeds_value = inputs + + square_of_sum = tf.square(reduce_sum( + concated_embeds_value, axis=1, keep_dims=True)) + sum_of_square = reduce_sum( + concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True) + cross_term = square_of_sum - sum_of_square + cross_term = 0.5 * reduce_sum(cross_term, axis=2, keep_dims=False) + + return cross_term + + def compute_output_shape(self, input_shape): + return (None, 1) + + + +class InnerProductLayer(Layer): + """InnerProduct Layer used in PNN that compute the element-wise + product or inner product between feature vectors. + + Input shape + - a list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - 3D tensor with shape: ``(batch_size, N*(N-1)/2 ,1)`` if use reduce_sum. or 3D tensor with shape: ``(batch_size, N*(N-1)/2, embedding_size )`` if not use reduce_sum. + + Arguments + - **reduce_sum**: bool. Whether return inner product or element-wise product + + References + - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf) + """ + + def __init__(self, reduce_sum=True, **kwargs): + self.reduce_sum = reduce_sum + super(InnerProductLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `InnerProductLayer` layer should be called ' + 'on a list of at least 2 inputs') + + reduced_inputs_shapes = [shape.as_list() for shape in input_shape] + shape_set = set() + + for i in range(len(input_shape)): + shape_set.add(tuple(reduced_inputs_shapes[i])) + + if len(shape_set) > 1: + raise ValueError('A `InnerProductLayer` layer requires ' + 'inputs with same shapes ' + 'Got different shapes: %s' % (shape_set)) + + if len(input_shape[0]) != 3 or input_shape[0][1] != 1: + raise ValueError('A `InnerProductLayer` layer requires ' + 'inputs of a list with same shape tensor like (None,1,embedding_size)' + 'Got different shapes: %s' % (input_shape[0])) + super(InnerProductLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embed_list = inputs + row = [] + col = [] + num_inputs = len(embed_list) + + for i in range(num_inputs - 1): + for j in range(i + 1, num_inputs): + row.append(i) + col.append(j) + p = tf.concat([embed_list[idx] + for idx in row], axis=1) # batch num_pairs k + q = tf.concat([embed_list[idx] + for idx in col], axis=1) + + inner_product = p * q + if self.reduce_sum: + inner_product = reduce_sum( + inner_product, axis=2, keep_dims=True) + return inner_product + + def compute_output_shape(self, input_shape): + num_inputs = len(input_shape) + num_pairs = int(num_inputs * (num_inputs - 1) / 2) + input_shape = input_shape[0] + embed_size = input_shape[-1] + if self.reduce_sum: + return (input_shape[0], num_pairs, 1) + else: + return (input_shape[0], num_pairs, embed_size) + + def get_config(self, ): + config = {'reduce_sum': self.reduce_sum, } + base_config = super(InnerProductLayer, self).get_config() + base_config.update(config) + return base_config + + +class InteractingLayer(Layer): + """A Layer used in AutoInt that model the correlations between different feature fields by multi-head self-attention mechanism. + + Input shape + - A 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 3D tensor with shape:``(batch_size,field_size,att_embedding_size * head_num)``. + + + Arguments + - **att_embedding_size**: int.The embedding size in multi-head self-attention network. + - **head_num**: int.The head number in multi-head self-attention network. + - **use_res**: bool.Whether or not use standard residual connections before output. + - **seed**: A Python integer to use as random seed. + + References + - [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921) + """ + + def __init__(self, att_embedding_size=8, head_num=2, use_res=True, scaling=False, seed=1024, **kwargs): + if head_num <= 0: + raise ValueError('head_num must be a int > 0') + self.att_embedding_size = att_embedding_size + self.head_num = head_num + self.use_res = use_res + self.seed = seed + self.scaling = scaling + super(InteractingLayer, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + embedding_size = int(input_shape[-1]) + self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed)) + self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 1)) + self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 2)) + if self.use_res: + self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed)) + + # Be sure to call this somewhere! + super(InteractingLayer, self).build(input_shape) + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + querys = tf.tensordot(inputs, self.W_Query, + axes=(-1, 0)) # None F D*head_num + keys = tf.tensordot(inputs, self.W_key, axes=(-1, 0)) + values = tf.tensordot(inputs, self.W_Value, axes=(-1, 0)) + + # head_num None F D + querys = tf.stack(tf.split(querys, self.head_num, axis=2)) + keys = tf.stack(tf.split(keys, self.head_num, axis=2)) + values = tf.stack(tf.split(values, self.head_num, axis=2)) + + inner_product = tf.matmul( + querys, keys, transpose_b=True) # head_num None F F + if self.scaling: + inner_product /= self.att_embedding_size ** 0.5 + self.normalized_att_scores = softmax(inner_product) + + result = tf.matmul(self.normalized_att_scores, + values) # head_num None F D + result = tf.concat(tf.split(result, self.head_num, ), axis=-1) + result = tf.squeeze(result, axis=0) # None F D*head_num + + if self.use_res: + result += tf.tensordot(inputs, self.W_Res, axes=(-1, 0)) + result = tf.nn.relu(result) + + return result + + def compute_output_shape(self, input_shape): + + return (None, input_shape[1], self.att_embedding_size * self.head_num) + + def get_config(self, ): + config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, 'use_res': self.use_res, + 'seed': self.seed} + base_config = super(InteractingLayer, self).get_config() + base_config.update(config) + return base_config + + +class OutterProductLayer(Layer): + """OutterProduct Layer used in PNN.This implemention is + adapted from code that the author of the paper published on https://github.com/Atomu2014/product-nets. + + Input shape + - A list of N 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - 2D tensor with shape:``(batch_size,N*(N-1)/2 )``. + + Arguments + - **kernel_type**: str. The kernel weight matrix type to use,can be mat,vec or num + + - **seed**: A Python integer to use as random seed. + + References + - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf) + """ + + def __init__(self, kernel_type='mat', seed=1024, **kwargs): + if kernel_type not in ['mat', 'vec', 'num']: + raise ValueError("kernel_type must be mat,vec or num") + self.kernel_type = kernel_type + self.seed = seed + super(OutterProductLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `OutterProductLayer` layer should be called ' + 'on a list of at least 2 inputs') + + reduced_inputs_shapes = [shape.as_list() for shape in input_shape] + shape_set = set() + + for i in range(len(input_shape)): + shape_set.add(tuple(reduced_inputs_shapes[i])) + + if len(shape_set) > 1: + raise ValueError('A `OutterProductLayer` layer requires ' + 'inputs with same shapes ' + 'Got different shapes: %s' % (shape_set)) + + if len(input_shape[0]) != 3 or input_shape[0][1] != 1: + raise ValueError('A `OutterProductLayer` layer requires ' + 'inputs of a list with same shape tensor like (None,1,embedding_size)' + 'Got different shapes: %s' % (input_shape[0])) + num_inputs = len(input_shape) + num_pairs = int(num_inputs * (num_inputs - 1) / 2) + input_shape = input_shape[0] + embed_size = int(input_shape[-1]) + if self.kernel_type == 'mat': + + self.kernel = self.add_weight(shape=(embed_size, num_pairs, embed_size), + initializer=glorot_uniform( + seed=self.seed), + name='kernel') + elif self.kernel_type == 'vec': + self.kernel = self.add_weight(shape=(num_pairs, embed_size,), initializer=glorot_uniform(self.seed), + name='kernel' + ) + elif self.kernel_type == 'num': + self.kernel = self.add_weight( + shape=(num_pairs, 1), initializer=glorot_uniform(self.seed), name='kernel') + + super(OutterProductLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embed_list = inputs + row = [] + col = [] + num_inputs = len(embed_list) + for i in range(num_inputs - 1): + for j in range(i + 1, num_inputs): + row.append(i) + col.append(j) + p = tf.concat([embed_list[idx] + for idx in row], axis=1) # batch num_pairs k + # Reshape([num_pairs, self.embedding_size]) + q = tf.concat([embed_list[idx] for idx in col], axis=1) + + # ------------------------- + if self.kernel_type == 'mat': + p = tf.expand_dims(p, 1) + # k k* pair* k + # batch * pair + kp = reduce_sum( + + # batch * pair * k + + tf.multiply( + + # batch * pair * k + + tf.transpose( + + # batch * k * pair + + reduce_sum( + + # batch * k * pair * k + + tf.multiply( + + p, self.kernel), + + -1), + + [0, 2, 1]), + + q), + + -1) + else: + # 1 * pair * (k or 1) + + k = tf.expand_dims(self.kernel, 0) + + # batch * pair + + kp = reduce_sum(p * q * k, -1) + + # p q # b * p * k + + return kp + + def compute_output_shape(self, input_shape): + num_inputs = len(input_shape) + num_pairs = int(num_inputs * (num_inputs - 1) / 2) + return (None, num_pairs) + + def get_config(self, ): + config = {'kernel_type': self.kernel_type, 'seed': self.seed} + base_config = super(OutterProductLayer, self).get_config() + base_config.update(config) + return base_config + + +class FGCNNLayer(Layer): + """Feature Generation Layer used in FGCNN,including Convolution,MaxPooling and Recombination. + + Input shape + - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``. + + Output shape + - 3D tensor with shape: ``(batch_size,new_feture_num,embedding_size)``. + + References + - [Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019.](https://arxiv.org/pdf/1904.04447) + + """ + + def __init__(self, filters=(14, 16,), kernel_width=(7, 7,), new_maps=(3, 3,), pooling_width=(2, 2), + **kwargs): + if not (len(filters) == len(kernel_width) == len(new_maps) == len(pooling_width)): + raise ValueError("length of argument must be equal") + self.filters = filters + self.kernel_width = kernel_width + self.new_maps = new_maps + self.pooling_width = pooling_width + + super(FGCNNLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + self.conv_layers = [] + self.pooling_layers = [] + self.dense_layers = [] + pooling_shape = input_shape.as_list() + [1, ] + embedding_size = int(input_shape[-1]) + for i in range(1, len(self.filters) + 1): + filters = self.filters[i - 1] + width = self.kernel_width[i - 1] + new_filters = self.new_maps[i - 1] + pooling_width = self.pooling_width[i - 1] + conv_output_shape = self._conv_output_shape( + pooling_shape, (width, 1)) + pooling_shape = self._pooling_output_shape( + conv_output_shape, (pooling_width, 1)) + self.conv_layers.append(Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1), + padding='same', + activation='tanh', use_bias=True, )) + self.pooling_layers.append( + MaxPooling2D(pool_size=(pooling_width, 1))) + self.dense_layers.append(Dense(pooling_shape[1] * embedding_size * new_filters, + activation='tanh', use_bias=True)) + + self.flatten = Flatten() + + super(FGCNNLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + embedding_size = int(inputs.shape[-1]) + pooling_result = tf.expand_dims(inputs, axis=3) + + new_feature_list = [] + + for i in range(1, len(self.filters) + 1): + new_filters = self.new_maps[i - 1] + + conv_result = self.conv_layers[i - 1](pooling_result) + + pooling_result = self.pooling_layers[i - 1](conv_result) + + flatten_result = self.flatten(pooling_result) + + new_result = self.dense_layers[i - 1](flatten_result) + + new_feature_list.append( + tf.reshape(new_result, (-1, int(pooling_result.shape[1]) * new_filters, embedding_size))) + + new_features = concat_func(new_feature_list, axis=1) + return new_features + + def compute_output_shape(self, input_shape): + + new_features_num = 0 + features_num = input_shape[1] + + for i in range(0, len(self.kernel_width)): + pooled_features_num = features_num // self.pooling_width[i] + new_features_num += self.new_maps[i] * pooled_features_num + features_num = pooled_features_num + + return (None, new_features_num, input_shape[-1]) + + def get_config(self, ): + config = {'kernel_width': self.kernel_width, 'filters': self.filters, 'new_maps': self.new_maps, + 'pooling_width': self.pooling_width} + base_config = super(FGCNNLayer, self).get_config() + base_config.update(config) + return base_config + + def _conv_output_shape(self, input_shape, kernel_size): + # channels_last + space = input_shape[1:-1] + new_space = [] + for i in range(len(space)): + new_dim = utils.conv_output_length( + space[i], + kernel_size[i], + padding='same', + stride=1, + dilation=1) + new_space.append(new_dim) + return ([input_shape[0]] + new_space + [self.filters]) + + def _pooling_output_shape(self, input_shape, pool_size): + # channels_last + + rows = input_shape[1] + cols = input_shape[2] + rows = utils.conv_output_length(rows, pool_size[0], 'valid', + pool_size[0]) + cols = utils.conv_output_length(cols, pool_size[1], 'valid', + pool_size[1]) + return [input_shape[0], rows, cols, input_shape[3]] + + +class SENETLayer(Layer): + """SENETLayer used in FiBiNET. + + Input shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Output shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. + + Arguments + - **reduction_ratio** : Positive integer, dimensionality of the + attention network output space. + + - **seed** : A Python integer to use as random seed. + + References + - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) + """ + + def __init__(self, reduction_ratio=3, seed=1024, **kwargs): + self.reduction_ratio = reduction_ratio + + self.seed = seed + super(SENETLayer, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `AttentionalFM` layer should be called ' + 'on a list of at least 2 inputs') + + self.filed_size = len(input_shape) + self.embedding_size = input_shape[0][-1] + reduction_size = max(1, self.filed_size // self.reduction_ratio) + + self.W_1 = self.add_weight(shape=( + self.filed_size, reduction_size), initializer=glorot_normal(seed=self.seed), name="W_1") + self.W_2 = self.add_weight(shape=( + reduction_size, self.filed_size), initializer=glorot_normal(seed=self.seed), name="W_2") + + self.tensordot = Lambda( + lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0))) + + # Be sure to call this somewhere! + super(SENETLayer, self).build(input_shape) + + def call(self, inputs, training=None, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + inputs = concat_func(inputs, axis=1) + Z = reduce_mean(inputs, axis=-1, ) + + A_1 = tf.nn.relu(self.tensordot([Z, self.W_1])) + A_2 = tf.nn.relu(self.tensordot([A_1, self.W_2])) + V = tf.multiply(inputs, tf.expand_dims(A_2, axis=2)) + + return tf.split(V, self.filed_size, axis=1) + + def compute_output_shape(self, input_shape): + + return input_shape + + def compute_mask(self, inputs, mask=None): + return [None] * self.filed_size + + def get_config(self, ): + config = {'reduction_ratio': self.reduction_ratio, 'seed': self.seed} + base_config = super(SENETLayer, self).get_config() + base_config.update(config) + return base_config + + +class BilinearInteraction(Layer): + """BilinearInteraction Layer used in FiBiNET. + + Input shape + - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Its length is ``filed_size``. + + Output shape + - 3D tensor with shape: ``(batch_size,filed_size*(filed_size-1)/2,embedding_size)``. + + Arguments + - **bilinear_type** : String, types of bilinear functions used in this layer. + + - **seed** : A Python integer to use as random seed. + + References + - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) + + """ + + def __init__(self, bilinear_type="interaction", seed=1024, **kwargs): + self.bilinear_type = bilinear_type + self.seed = seed + + super(BilinearInteraction, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError('A `AttentionalFM` layer should be called ' + 'on a list of at least 2 inputs') + embedding_size = int(input_shape[0][-1]) + + if self.bilinear_type == "all": + self.W = self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( + seed=self.seed), name="bilinear_weight") + elif self.bilinear_type == "each": + self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( + seed=self.seed), name="bilinear_weight" + str(i)) for i in range(len(input_shape) - 1)] + elif self.bilinear_type == "interaction": + self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( + seed=self.seed), name="bilinear_weight" + str(i) + '_' + str(j)) for i, j in + itertools.combinations(range(len(input_shape)), 2)] + else: + raise NotImplementedError + + super(BilinearInteraction, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) + + n = len(inputs) + if self.bilinear_type == "all": + vidots = [tf.tensordot(inputs[i], self.W, axes=(-1, 0)) for i in range(n)] + p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)] + elif self.bilinear_type == "each": + vidots = [tf.tensordot(inputs[i], self.W_list[i], axes=(-1, 0)) for i in range(n - 1)] + p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)] + elif self.bilinear_type == "interaction": + p = [tf.multiply(tf.tensordot(v[0], w, axes=(-1, 0)), v[1]) + for v, w in zip(itertools.combinations(inputs, 2), self.W_list)] + else: + raise NotImplementedError + output = concat_func(p, axis=1) + return output + + def compute_output_shape(self, input_shape): + filed_size = len(input_shape) + embedding_size = input_shape[0][-1] + + return (None, filed_size * (filed_size - 1) // 2, embedding_size) + + def get_config(self, ): + config = {'bilinear_type': self.bilinear_type, 'seed': self.seed} + base_config = super(BilinearInteraction, self).get_config() + base_config.update(config) + return base_config + + +class FieldWiseBiInteraction(Layer): + """Field-Wise Bi-Interaction Layer used in FLEN,compress the + pairwise element-wise product of features into one single vector. + + Input shape + - A list of 3D tensor with shape:``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size,embedding_size)``. + + Arguments + - **use_bias** : Boolean, if use bias. + - **seed** : A Python integer to use as random seed. + + References + - [FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690) + + """ + + def __init__(self, use_bias=True, seed=1024, **kwargs): + self.use_bias = use_bias + self.seed = seed + + super(FieldWiseBiInteraction, self).__init__(**kwargs) + + def build(self, input_shape): + + if not isinstance(input_shape, list) or len(input_shape) < 2: + raise ValueError( + 'A `Field-Wise Bi-Interaction` layer should be called ' + 'on a list of at least 2 inputs') + + self.num_fields = len(input_shape) + embedding_size = input_shape[0][-1] + + self.kernel_mf = self.add_weight( + name='kernel_mf', + shape=(int(self.num_fields * (self.num_fields - 1) / 2), 1), + initializer=Ones(), + regularizer=None, + trainable=True) + + self.kernel_fm = self.add_weight( + name='kernel_fm', + shape=(self.num_fields, 1), + initializer=Constant(value=0.5), + regularizer=None, + trainable=True) + if self.use_bias: + self.bias_mf = self.add_weight(name='bias_mf', + shape=(embedding_size), + initializer=Zeros()) + self.bias_fm = self.add_weight(name='bias_fm', + shape=(embedding_size), + initializer=Zeros()) + + super(FieldWiseBiInteraction, + self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + + if K.ndim(inputs[0]) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % + (K.ndim(inputs))) + + field_wise_embeds_list = inputs + + # MF module + field_wise_vectors = tf.concat([ + reduce_sum(field_i_vectors, axis=1, keep_dims=True) + for field_i_vectors in field_wise_embeds_list + ], 1) + + left = [] + right = [] + + for i, j in itertools.combinations(list(range(self.num_fields)), 2): + left.append(i) + right.append(j) + + embeddings_left = tf.gather(params=field_wise_vectors, + indices=left, + axis=1) + embeddings_right = tf.gather(params=field_wise_vectors, + indices=right, + axis=1) + + embeddings_prod = embeddings_left * embeddings_right + field_weighted_embedding = embeddings_prod * self.kernel_mf + h_mf = reduce_sum(field_weighted_embedding, axis=1) + if self.use_bias: + h_mf = tf.nn.bias_add(h_mf, self.bias_mf) + + # FM module + square_of_sum_list = [ + tf.square(reduce_sum(field_i_vectors, axis=1, keep_dims=True)) + for field_i_vectors in field_wise_embeds_list + ] + sum_of_square_list = [ + reduce_sum(field_i_vectors * field_i_vectors, + axis=1, + keep_dims=True) + for field_i_vectors in field_wise_embeds_list + ] + + field_fm = tf.concat([ + square_of_sum - sum_of_square for square_of_sum, sum_of_square in + zip(square_of_sum_list, sum_of_square_list) + ], 1) + + h_fm = reduce_sum(field_fm * self.kernel_fm, axis=1) + if self.use_bias: + h_fm = tf.nn.bias_add(h_fm, self.bias_fm) + + return h_mf + h_fm + + def compute_output_shape(self, input_shape): + return (None, input_shape[0][-1]) + + def get_config(self, ): + config = {'use_bias': self.use_bias, 'seed': self.seed} + base_config = super(FieldWiseBiInteraction, self).get_config() + base_config.update(config) + return base_config + + +class FwFMLayer(Layer): + """Field-weighted Factorization Machines + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + + Arguments + - **num_fields** : integer for number of fields + - **regularizer** : L2 regularizer weight for the field strength parameters of FwFM + + References + - [Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising] + https://arxiv.org/pdf/1806.03514.pdf + """ + + def __init__(self, num_fields=4, regularizer=0.000001, **kwargs): + self.num_fields = num_fields + self.regularizer = regularizer + super(FwFMLayer, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError("Unexpected inputs dimensions % d,\ + expect to be 3 dimensions" % (len(input_shape))) + + if input_shape[1] != self.num_fields: + raise ValueError("Mismatch in number of fields {} and \ + concatenated embeddings dims {}".format(self.num_fields, input_shape[1])) + + self.field_strengths = self.add_weight(name='field_pair_strengths', + shape=(self.num_fields, self.num_fields), + initializer=TruncatedNormal(), + regularizer=l2(self.regularizer), + trainable=True) + + super(FwFMLayer, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" + % (K.ndim(inputs))) + + if inputs.shape[1] != self.num_fields: + raise ValueError("Mismatch in number of fields {} and \ + concatenated embeddings dims {}".format(self.num_fields, inputs.shape[1])) + + pairwise_inner_prods = [] + for fi, fj in itertools.combinations(range(self.num_fields), 2): + # get field strength for pair fi and fj + r_ij = self.field_strengths[fi, fj] + + # get embeddings for the features of both the fields + feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1) + feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1) + + f = tf.scalar_mul(r_ij, batch_dot(feat_embed_i, feat_embed_j, axes=1)) + pairwise_inner_prods.append(f) + + sum_ = tf.add_n(pairwise_inner_prods) + return sum_ + + def compute_output_shape(self, input_shape): + return (None, 1) + + def get_config(self): + config = super(FwFMLayer, self).get_config().copy() + config.update({ + 'num_fields': self.num_fields, + 'regularizer': self.regularizer + }) + return config + + +class FEFMLayer(Layer): + """Field-Embedded Factorization Machines + + Input shape + - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. + + Output shape + - 2D tensor with shape: + ``(batch_size, (num_fields * (num_fields-1))/2)`` # concatenated FEFM interaction embeddings + + Arguments + - **regularizer** : L2 regularizer weight for the field pair matrix embeddings parameters of FEFM + + References + - [Field-Embedded Factorization Machines for Click-through Rate Prediction] + https://arxiv.org/pdf/2009.09931.pdf + """ + + def __init__(self, regularizer, **kwargs): + self.regularizer = regularizer + super(FEFMLayer, self).__init__(**kwargs) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError("Unexpected inputs dimensions % d,\ + expect to be 3 dimensions" % (len(input_shape))) + + self.num_fields = int(input_shape[1]) + embedding_size = int(input_shape[2]) + + self.field_embeddings = {} + for fi, fj in itertools.combinations(range(self.num_fields), 2): + field_pair_id = str(fi) + "-" + str(fj) + self.field_embeddings[field_pair_id] = self.add_weight(name='field_embeddings' + field_pair_id, + shape=(embedding_size, embedding_size), + initializer=TruncatedNormal(), + regularizer=l2(self.regularizer), + trainable=True) + + super(FEFMLayer, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if K.ndim(inputs) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" + % (K.ndim(inputs))) + + pairwise_inner_prods = [] + for fi, fj in itertools.combinations(range(self.num_fields), 2): + field_pair_id = str(fi) + "-" + str(fj) + feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1) + feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1) + field_pair_embed_ij = self.field_embeddings[field_pair_id] + + feat_embed_i_tr = tf.matmul(feat_embed_i, field_pair_embed_ij + tf.transpose(field_pair_embed_ij)) + + f = batch_dot(feat_embed_i_tr, feat_embed_j, axes=1) + pairwise_inner_prods.append(f) + + concat_vec = tf.concat(pairwise_inner_prods, axis=1) + return concat_vec + + def compute_output_shape(self, input_shape): + num_fields = int(input_shape[1]) + return (None, (num_fields * (num_fields - 1)) / 2) + + def get_config(self): + config = super(FEFMLayer, self).get_config().copy() + config.update({ + 'regularizer': self.regularizer, + }) + return config diff --git a/modelzoo/PNN/script/layers/normalization.py b/modelzoo/PNN/script/layers/normalization.py new file mode 100644 index 00000000000..3fceb1257d8 --- /dev/null +++ b/modelzoo/PNN/script/layers/normalization.py @@ -0,0 +1,51 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +from tensorflow.python.keras import backend as K +from tensorflow.python.keras.layers import Layer + +try: + from tensorflow.python.ops.init_ops import Zeros, Ones +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros, Ones + + +class LayerNormalization(Layer): + def __init__(self, axis=-1, eps=1e-9, center=True, + scale=True, **kwargs): + self.axis = axis + self.eps = eps + self.center = center + self.scale = scale + super(LayerNormalization, self).__init__(**kwargs) + + def build(self, input_shape): + self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], + initializer=Ones(), trainable=True) + self.beta = self.add_weight(name='beta', shape=input_shape[-1:], + initializer=Zeros(), trainable=True) + super(LayerNormalization, self).build(input_shape) + + def call(self, inputs): + mean = K.mean(inputs, axis=self.axis, keepdims=True) + variance = K.mean(K.square(inputs - mean), axis=-1, keepdims=True) + std = K.sqrt(variance + self.eps) + outputs = (inputs - mean) / std + if self.scale: + outputs *= self.gamma + if self.center: + outputs += self.beta + return outputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'axis': self.axis, 'eps': self.eps, 'center': self.center, 'scale': self.scale} + base_config = super(LayerNormalization, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/modelzoo/PNN/script/layers/sequence.py b/modelzoo/PNN/script/layers/sequence.py new file mode 100644 index 00000000000..45a65915c22 --- /dev/null +++ b/modelzoo/PNN/script/layers/sequence.py @@ -0,0 +1,901 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import numpy as np +import tensorflow as tf +from tensorflow.python.keras import backend as K + +try: + from tensorflow.python.ops.init_ops import TruncatedNormal, glorot_uniform_initializer as glorot_uniform, \ + identity_initializer as identity +except ImportError: + from tensorflow.python.ops.init_ops_v2 import TruncatedNormal, glorot_uniform, identity + +from tensorflow.python.keras.layers import LSTM, Lambda, Layer, Dropout + +from .core import LocalActivationUnit +from .normalization import LayerNormalization + +if tf.__version__ >= '2.0.0': + from ..contrib.rnn_v2 import dynamic_rnn +else: + from ..contrib.rnn import dynamic_rnn +from ..contrib.utils import QAAttGRUCell, VecAttGRUCell +from .utils import reduce_sum, reduce_max, div, softmax, reduce_mean + + +class SequencePoolingLayer(Layer): + """The SequencePoolingLayer is used to apply pooling operation(sum,mean,max) on variable-length sequence feature/multi-value feature. + + Input shape + - A list of two tensor [seq_value,seq_len] + + - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` + + - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence. + + Output shape + - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. + + Arguments + - **mode**:str.Pooling operation to be used,can be sum,mean or max. + + - **supports_masking**:If True,the input need to support masking. + """ + + def __init__(self, mode='mean', supports_masking=False, **kwargs): + + if mode not in ['sum', 'mean', 'max']: + raise ValueError("mode must be sum or mean") + self.mode = mode + self.eps = tf.constant(1e-8, tf.float32) + super(SequencePoolingLayer, self).__init__(**kwargs) + + self.supports_masking = supports_masking + + def build(self, input_shape): + if not self.supports_masking: + self.seq_len_max = int(input_shape[0][1]) + super(SequencePoolingLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, seq_value_len_list, mask=None, **kwargs): + if self.supports_masking: + if mask is None: + raise ValueError( + "When supports_masking=True,input must support masking") + uiseq_embed_list = seq_value_len_list + mask = tf.cast(mask, tf.float32) # tf.to_float(mask) + user_behavior_length = reduce_sum(mask, axis=-1, keep_dims=True) + mask = tf.expand_dims(mask, axis=2) + else: + uiseq_embed_list, user_behavior_length = seq_value_len_list + + mask = tf.sequence_mask(user_behavior_length, + self.seq_len_max, dtype=tf.float32) + mask = tf.transpose(mask, (0, 2, 1)) + + embedding_size = uiseq_embed_list.shape[-1] + + mask = tf.tile(mask, [1, 1, embedding_size]) + + if self.mode == "max": + hist = uiseq_embed_list - (1 - mask) * 1e9 + return reduce_max(hist, 1, keep_dims=True) + + hist = reduce_sum(uiseq_embed_list * mask, 1, keep_dims=False) + + if self.mode == "mean": + hist = div(hist, tf.cast(user_behavior_length, tf.float32) + self.eps) + + hist = tf.expand_dims(hist, axis=1) + return hist + + def compute_output_shape(self, input_shape): + if self.supports_masking: + return (None, 1, input_shape[-1]) + else: + return (None, 1, input_shape[0][-1]) + + def compute_mask(self, inputs, mask): + return None + + def get_config(self, ): + config = {'mode': self.mode, 'supports_masking': self.supports_masking} + base_config = super(SequencePoolingLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class WeightedSequenceLayer(Layer): + """The WeightedSequenceLayer is used to apply weight score on variable-length sequence feature/multi-value feature. + + Input shape + - A list of two tensor [seq_value,seq_len,seq_weight] + + - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` + + - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence. + + - seq_weight is a 3D tensor with shape: ``(batch_size, T, 1)`` + + Output shape + - 3D tensor with shape: ``(batch_size, T, embedding_size)``. + + Arguments + - **weight_normalization**: bool.Whether normalize the weight score before applying to sequence. + + - **supports_masking**:If True,the input need to support masking. + """ + + def __init__(self, weight_normalization=True, supports_masking=False, **kwargs): + super(WeightedSequenceLayer, self).__init__(**kwargs) + self.weight_normalization = weight_normalization + self.supports_masking = supports_masking + + def build(self, input_shape): + if not self.supports_masking: + self.seq_len_max = int(input_shape[0][1]) + super(WeightedSequenceLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, input_list, mask=None, **kwargs): + if self.supports_masking: + if mask is None: + raise ValueError( + "When supports_masking=True,input must support masking") + key_input, value_input = input_list + mask = tf.expand_dims(mask[0], axis=2) + else: + key_input, key_length_input, value_input = input_list + mask = tf.sequence_mask(key_length_input, + self.seq_len_max, dtype=tf.bool) + mask = tf.transpose(mask, (0, 2, 1)) + + embedding_size = key_input.shape[-1] + + if self.weight_normalization: + paddings = tf.ones_like(value_input) * (-2 ** 32 + 1) + else: + paddings = tf.zeros_like(value_input) + value_input = tf.where(mask, value_input, paddings) + + if self.weight_normalization: + value_input = softmax(value_input, dim=1) + + if len(value_input.shape) == 2: + value_input = tf.expand_dims(value_input, axis=2) + value_input = tf.tile(value_input, [1, 1, embedding_size]) + + return tf.multiply(key_input, value_input) + + def compute_output_shape(self, input_shape): + return input_shape[0] + + def compute_mask(self, inputs, mask): + if self.supports_masking: + return mask[0] + else: + return None + + def get_config(self, ): + config = {'weight_normalization': self.weight_normalization, 'supports_masking': self.supports_masking} + base_config = super(WeightedSequenceLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class AttentionSequencePoolingLayer(Layer): + """The Attentional sequence pooling operation used in DIN. + + Input shape + - A list of three tensor: [query,keys,keys_length] + + - query is a 3D tensor with shape: ``(batch_size, 1, embedding_size)`` + + - keys is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` + + - keys_length is a 2D tensor with shape: ``(batch_size, 1)`` + + Output shape + - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. + + Arguments + - **att_hidden_units**:list of positive integer, the attention net layer number and units in each layer. + + - **att_activation**: Activation function to use in attention net. + + - **weight_normalization**: bool.Whether normalize the attention score of local activation unit. + + - **supports_masking**:If True,the input need to support masking. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, att_hidden_units=(80, 40), att_activation='sigmoid', weight_normalization=False, + return_score=False, + supports_masking=False, **kwargs): + + self.att_hidden_units = att_hidden_units + self.att_activation = att_activation + self.weight_normalization = weight_normalization + self.return_score = return_score + super(AttentionSequencePoolingLayer, self).__init__(**kwargs) + self.supports_masking = supports_masking + + def build(self, input_shape): + if not self.supports_masking: + if not isinstance(input_shape, list) or len(input_shape) != 3: + raise ValueError('A `AttentionSequencePoolingLayer` layer should be called ' + 'on a list of 3 inputs') + + if len(input_shape[0]) != 3 or len(input_shape[1]) != 3 or len(input_shape[2]) != 2: + raise ValueError( + "Unexpected inputs dimensions,the 3 tensor dimensions are %d,%d and %d , expect to be 3,3 and 2" % ( + len(input_shape[0]), len(input_shape[1]), len(input_shape[2]))) + + if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1 or input_shape[2][1] != 1: + raise ValueError('A `AttentionSequencePoolingLayer` layer requires ' + 'inputs of a 3 tensor with shape (None,1,embedding_size),(None,T,embedding_size) and (None,1)' + 'Got different shapes: %s' % (input_shape)) + else: + pass + self.local_att = LocalActivationUnit( + self.att_hidden_units, self.att_activation, l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, ) + super(AttentionSequencePoolingLayer, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, mask=None, training=None, **kwargs): + + if self.supports_masking: + if mask is None: + raise ValueError( + "When supports_masking=True,input must support masking") + queries, keys = inputs + key_masks = tf.expand_dims(mask[-1], axis=1) + + else: + + queries, keys, keys_length = inputs + hist_len = keys.get_shape()[1] + key_masks = tf.sequence_mask(keys_length, hist_len) + + attention_score = self.local_att([queries, keys], training=training) + + outputs = tf.transpose(attention_score, (0, 2, 1)) + + if self.weight_normalization: + paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) + else: + paddings = tf.zeros_like(outputs) + + outputs = tf.where(key_masks, outputs, paddings) + + if self.weight_normalization: + outputs = softmax(outputs) + + if not self.return_score: + outputs = tf.matmul(outputs, keys) + + if tf.__version__ < '1.13.0': + outputs._uses_learning_phase = attention_score._uses_learning_phase + else: + outputs._uses_learning_phase = training is not None + + return outputs + + def compute_output_shape(self, input_shape): + if self.return_score: + return (None, 1, input_shape[1][1]) + else: + return (None, 1, input_shape[0][-1]) + + def compute_mask(self, inputs, mask): + return None + + def get_config(self, ): + + config = {'att_hidden_units': self.att_hidden_units, 'att_activation': self.att_activation, + 'weight_normalization': self.weight_normalization, 'return_score': self.return_score, + 'supports_masking': self.supports_masking} + base_config = super(AttentionSequencePoolingLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class BiLSTM(Layer): + """A multiple layer Bidirectional Residual LSTM Layer. + + Input shape + - 3D tensor with shape ``(batch_size, timesteps, input_dim)``. + + Output shape + - 3D tensor with shape: ``(batch_size, timesteps, units)``. + + Arguments + - **units**: Positive integer, dimensionality of the output space. + + - **layers**:Positive integer, number of LSTM layers to stacked. + + - **res_layers**: Positive integer, number of residual connection to used in last ``res_layers``. + + - **dropout_rate**: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. + + - **merge_mode**: merge_mode: Mode by which outputs of the forward and backward RNNs will be combined. One of { ``'fw'`` , ``'bw'`` , ``'sum'`` , ``'mul'`` , ``'concat'`` , ``'ave'`` , ``None`` }. If None, the outputs will not be combined, they will be returned as a list. + + + """ + + def __init__(self, units, layers=2, res_layers=0, dropout_rate=0.2, merge_mode='ave', **kwargs): + + if merge_mode not in ['fw', 'bw', 'sum', 'mul', 'ave', 'concat', None]: + raise ValueError('Invalid merge mode. ' + 'Merge mode should be one of ' + '{"fw","bw","sum", "mul", "ave", "concat", None}') + + self.units = units + self.layers = layers + self.res_layers = res_layers + self.dropout_rate = dropout_rate + self.merge_mode = merge_mode + + super(BiLSTM, self).__init__(**kwargs) + self.supports_masking = True + + def build(self, input_shape): + + if len(input_shape) != 3: + raise ValueError( + "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) + self.fw_lstm = [] + self.bw_lstm = [] + for _ in range(self.layers): + self.fw_lstm.append( + LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True, + unroll=True)) + self.bw_lstm.append( + LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True, + go_backwards=True, unroll=True)) + + super(BiLSTM, self).build( + input_shape) # Be sure to call this somewhere! + + def call(self, inputs, mask=None, **kwargs): + + input_fw = inputs + input_bw = inputs + for i in range(self.layers): + output_fw = self.fw_lstm[i](input_fw) + output_bw = self.bw_lstm[i](input_bw) + output_bw = Lambda(lambda x: K.reverse( + x, 1), mask=lambda inputs, mask: mask)(output_bw) + + if i >= self.layers - self.res_layers: + output_fw += input_fw + output_bw += input_bw + input_fw = output_fw + input_bw = output_bw + + output_fw = input_fw + output_bw = input_bw + + if self.merge_mode == "fw": + output = output_fw + elif self.merge_mode == "bw": + output = output_bw + elif self.merge_mode == 'concat': + output = K.concatenate([output_fw, output_bw]) + elif self.merge_mode == 'sum': + output = output_fw + output_bw + elif self.merge_mode == 'ave': + output = (output_fw + output_bw) / 2 + elif self.merge_mode == 'mul': + output = output_fw * output_bw + elif self.merge_mode is None: + output = [output_fw, output_bw] + + return output + + def compute_output_shape(self, input_shape): + print(self.merge_mode) + if self.merge_mode is None: + return [input_shape, input_shape] + elif self.merge_mode == 'concat': + return input_shape[:-1] + (input_shape[-1] * 2,) + else: + return input_shape + + def compute_mask(self, inputs, mask): + return mask + + def get_config(self, ): + + config = {'units': self.units, 'layers': self.layers, + 'res_layers': self.res_layers, 'dropout_rate': self.dropout_rate, 'merge_mode': self.merge_mode} + base_config = super(BiLSTM, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class Transformer(Layer): + """ Simplified version of Transformer proposed in 《Attention is all you need》 + + Input shape + - a list of two 3D tensor with shape ``(batch_size, timesteps, input_dim)`` if ``supports_masking=True`` . + - a list of two 4 tensors, first two tensors with shape ``(batch_size, timesteps, input_dim)``,last two tensors with shape ``(batch_size, 1)`` if ``supports_masking=False`` . + + + Output shape + - 3D tensor with shape: ``(batch_size, 1, input_dim)`` if ``output_type='mean'`` or ``output_type='sum'`` , else ``(batch_size, timesteps, input_dim)`` . + + + Arguments + - **att_embedding_size**: int.The embedding size in multi-head self-attention network. + - **head_num**: int.The head number in multi-head self-attention network. + - **dropout_rate**: float between 0 and 1. Fraction of the units to drop. + - **use_positional_encoding**: bool. Whether or not use positional_encoding + - **use_res**: bool. Whether or not use standard residual connections before output. + - **use_feed_forward**: bool. Whether or not use pointwise feed foward network. + - **use_layer_norm**: bool. Whether or not use Layer Normalization. + - **blinding**: bool. Whether or not use blinding. + - **seed**: A Python integer to use as random seed. + - **supports_masking**:bool. Whether or not support masking. + - **attention_type**: str, Type of attention, the value must be one of { ``'scaled_dot_product'`` , ``'additive'`` }. + - **output_type**: ``'mean'`` , ``'sum'`` or `None`. Whether or not use average/sum pooling for output. + + References + - [Vaswani, Ashish, et al. "Attention is all you need." Advances in Neural Information Processing Systems. 2017.](https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf) + """ + + def __init__(self, att_embedding_size=1, head_num=8, dropout_rate=0.0, use_positional_encoding=True, use_res=True, + use_feed_forward=True, use_layer_norm=False, blinding=True, seed=1024, supports_masking=False, + attention_type="scaled_dot_product", output_type="mean", **kwargs): + if head_num <= 0: + raise ValueError('head_num must be a int > 0') + self.att_embedding_size = att_embedding_size + self.head_num = head_num + self.num_units = att_embedding_size * head_num + self.use_res = use_res + self.use_feed_forward = use_feed_forward + self.seed = seed + self.use_positional_encoding = use_positional_encoding + self.dropout_rate = dropout_rate + self.use_layer_norm = use_layer_norm + self.blinding = blinding + self.attention_type = attention_type + self.output_type = output_type + super(Transformer, self).__init__(**kwargs) + self.supports_masking = supports_masking + + def build(self, input_shape): + embedding_size = int(input_shape[0][-1]) + if self.num_units != embedding_size: + raise ValueError( + "att_embedding_size * head_num must equal the last dimension size of inputs,got %d * %d != %d" % ( + self.att_embedding_size, self.head_num, embedding_size)) + self.seq_len_max = int(input_shape[0][-2]) + self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed)) + self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 1)) + self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num], + dtype=tf.float32, + initializer=TruncatedNormal(seed=self.seed + 2)) + if self.attention_type == "additive": + self.b = self.add_weight('b', shape=[self.att_embedding_size], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + self.v = self.add_weight('v', shape=[self.att_embedding_size], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + # if self.use_res: + # self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, + # initializer=TruncatedNormal(seed=self.seed)) + if self.use_feed_forward: + self.fw1 = self.add_weight('fw1', shape=[self.num_units, 4 * self.num_units], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + self.fw2 = self.add_weight('fw2', shape=[4 * self.num_units, self.num_units], dtype=tf.float32, + initializer=glorot_uniform(seed=self.seed)) + + self.dropout = Dropout( + self.dropout_rate, seed=self.seed) + self.ln = LayerNormalization() + if self.use_positional_encoding: + self.query_pe = PositionEncoding() + self.key_pe = PositionEncoding() + # Be sure to call this somewhere! + super(Transformer, self).build(input_shape) + + def call(self, inputs, mask=None, training=None, **kwargs): + + if self.supports_masking: + queries, keys = inputs + query_masks, key_masks = mask + query_masks = tf.cast(query_masks, tf.float32) + key_masks = tf.cast(key_masks, tf.float32) + else: + queries, keys, query_masks, key_masks = inputs + + query_masks = tf.sequence_mask( + query_masks, self.seq_len_max, dtype=tf.float32) + key_masks = tf.sequence_mask( + key_masks, self.seq_len_max, dtype=tf.float32) + query_masks = tf.squeeze(query_masks, axis=1) + key_masks = tf.squeeze(key_masks, axis=1) + + if self.use_positional_encoding: + queries = self.query_pe(queries) + keys = self.key_pe(queries) + + querys = tf.tensordot(queries, self.W_Query, + axes=(-1, 0)) # None T_q D*head_num + keys = tf.tensordot(keys, self.W_key, axes=(-1, 0)) + values = tf.tensordot(keys, self.W_Value, axes=(-1, 0)) + + # head_num*None T_q D + querys = tf.concat(tf.split(querys, self.head_num, axis=2), axis=0) + keys = tf.concat(tf.split(keys, self.head_num, axis=2), axis=0) + values = tf.concat(tf.split(values, self.head_num, axis=2), axis=0) + + if self.attention_type == "scaled_dot_product": + # head_num*None T_q T_k + outputs = tf.matmul(querys, keys, transpose_b=True) + + outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5) + elif self.attention_type == "additive": + querys_reshaped = tf.expand_dims(querys, axis=-2) + keys_reshaped = tf.expand_dims(keys, axis=-3) + outputs = tf.tanh(tf.nn.bias_add(querys_reshaped + keys_reshaped, self.b)) + outputs = tf.squeeze(tf.tensordot(outputs, tf.expand_dims(self.v, axis=-1), axes=[-1, 0]), axis=-1) + else: + raise ValueError("attention_type must be scaled_dot_product or additive") + + key_masks = tf.tile(key_masks, [self.head_num, 1]) + + # (h*N, T_q, T_k) + key_masks = tf.tile(tf.expand_dims(key_masks, 1), + [1, tf.shape(queries)[1], 1]) + + paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) + + # (h*N, T_q, T_k) + + outputs = tf.where(tf.equal(key_masks, 1), outputs, paddings, ) + if self.blinding: + try: + outputs = tf.matrix_set_diag(outputs, tf.ones_like(outputs)[ + :, :, 0] * (-2 ** 32 + 1)) + except AttributeError: + outputs = tf.compat.v1.matrix_set_diag(outputs, tf.ones_like(outputs)[ + :, :, 0] * (-2 ** 32 + 1)) + + outputs -= reduce_max(outputs, axis=-1, keep_dims=True) + outputs = softmax(outputs) + query_masks = tf.tile(query_masks, [self.head_num, 1]) # (h*N, T_q) + # (h*N, T_q, T_k) + query_masks = tf.tile(tf.expand_dims( + query_masks, -1), [1, 1, tf.shape(keys)[1]]) + + outputs *= query_masks + + outputs = self.dropout(outputs, training=training) + # Weighted sum + # ( h*N, T_q, C/h) + result = tf.matmul(outputs, values) + result = tf.concat(tf.split(result, self.head_num, axis=0), axis=2) + + if self.use_res: + # tf.tensordot(queries, self.W_Res, axes=(-1, 0)) + result += queries + if self.use_layer_norm: + result = self.ln(result) + + if self.use_feed_forward: + fw1 = tf.nn.relu(tf.tensordot(result, self.fw1, axes=[-1, 0])) + fw1 = self.dropout(fw1, training=training) + fw2 = tf.tensordot(fw1, self.fw2, axes=[-1, 0]) + if self.use_res: + result += fw2 + if self.use_layer_norm: + result = self.ln(result) + + if self.output_type == "mean": + return reduce_mean(result, axis=1, keep_dims=True) + elif self.output_type == "sum": + return reduce_sum(result, axis=1, keep_dims=True) + else: + return result + + def compute_output_shape(self, input_shape): + + return (None, 1, self.att_embedding_size * self.head_num) + + def compute_mask(self, inputs, mask=None): + return None + + def get_config(self, ): + config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, + 'dropout_rate': self.dropout_rate, 'use_res': self.use_res, + 'use_positional_encoding': self.use_positional_encoding, 'use_feed_forward': self.use_feed_forward, + 'use_layer_norm': self.use_layer_norm, 'seed': self.seed, 'supports_masking': self.supports_masking, + 'blinding': self.blinding, 'attention_type': self.attention_type, 'output_type': self.output_type} + base_config = super(Transformer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class PositionEncoding(Layer): + def __init__(self, pos_embedding_trainable=True, + zero_pad=False, + scale=True, **kwargs): + self.pos_embedding_trainable = pos_embedding_trainable + self.zero_pad = zero_pad + self.scale = scale + super(PositionEncoding, self).__init__(**kwargs) + + def build(self, input_shape): + # Create a trainable weight variable for this layer. + _, T, num_units = input_shape.as_list() # inputs.get_shape().as_list() + # First part of the PE function: sin and cos argument + position_enc = np.array([ + [pos / np.power(10000, 2. * (i // 2) / num_units) for i in range(num_units)] + for pos in range(T)]) + + # Second part, apply the cosine to even columns and sin to odds. + position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i + position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1 + if self.zero_pad: + position_enc[0, :] = np.zeros(num_units) + self.lookup_table = self.add_weight("lookup_table", (T, num_units), + initializer=identity(position_enc), + trainable=self.pos_embedding_trainable) + + # Be sure to call this somewhere! + super(PositionEncoding, self).build(input_shape) + + def call(self, inputs, mask=None): + _, T, num_units = inputs.get_shape().as_list() + position_ind = tf.expand_dims(tf.range(T), 0) + outputs = tf.nn.embedding_lookup(self.lookup_table, position_ind) + if self.scale: + outputs = outputs * num_units ** 0.5 + return outputs + inputs + + def compute_output_shape(self, input_shape): + + return input_shape + + def compute_mask(self, inputs, mask=None): + return mask + + def get_config(self, ): + + config = {'pos_embedding_trainable': self.pos_embedding_trainable, 'zero_pad': self.zero_pad, + 'scale': self.scale} + base_config = super(PositionEncoding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class BiasEncoding(Layer): + def __init__(self, sess_max_count, seed=1024, **kwargs): + self.sess_max_count = sess_max_count + self.seed = seed + super(BiasEncoding, self).__init__(**kwargs) + + def build(self, input_shape): + # Create a trainable weight variable for this layer. + + if self.sess_max_count == 1: + embed_size = input_shape[2].value + seq_len_max = input_shape[1].value + else: + try: + embed_size = input_shape[0][2].value + seq_len_max = input_shape[0][1].value + except AttributeError: + embed_size = input_shape[0][2] + seq_len_max = input_shape[0][1] + + self.sess_bias_embedding = self.add_weight('sess_bias_embedding', shape=(self.sess_max_count, 1, 1), + initializer=TruncatedNormal( + mean=0.0, stddev=0.0001, seed=self.seed)) + self.seq_bias_embedding = self.add_weight('seq_bias_embedding', shape=(1, seq_len_max, 1), + initializer=TruncatedNormal( + mean=0.0, stddev=0.0001, seed=self.seed)) + self.item_bias_embedding = self.add_weight('item_bias_embedding', shape=(1, 1, embed_size), + initializer=TruncatedNormal( + mean=0.0, stddev=0.0001, seed=self.seed)) + + # Be sure to call this somewhere! + super(BiasEncoding, self).build(input_shape) + + def call(self, inputs, mask=None): + """ + :param concated_embeds_value: None * field_size * embedding_size + :return: None*1 + """ + transformer_out = [] + for i in range(self.sess_max_count): + transformer_out.append( + inputs[i] + self.item_bias_embedding + self.seq_bias_embedding + self.sess_bias_embedding[i]) + return transformer_out + + def compute_output_shape(self, input_shape): + + return input_shape + + def compute_mask(self, inputs, mask=None): + return mask + + def get_config(self, ): + + config = {'sess_max_count': self.sess_max_count, 'seed': self.seed, } + base_config = super(BiasEncoding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class DynamicGRU(Layer): + def __init__(self, num_units=None, gru_type='GRU', return_sequence=True, **kwargs): + + self.num_units = num_units + self.return_sequence = return_sequence + self.gru_type = gru_type + super(DynamicGRU, self).__init__(**kwargs) + + def build(self, input_shape): + # Create a trainable weight variable for this layer. + input_seq_shape = input_shape[0] + if self.num_units is None: + self.num_units = input_seq_shape.as_list()[-1] + if self.gru_type == "AGRU": + self.gru_cell = QAAttGRUCell(self.num_units) + elif self.gru_type == "AUGRU": + self.gru_cell = VecAttGRUCell(self.num_units) + else: + try: + self.gru_cell = tf.nn.rnn_cell.GRUCell(self.num_units) # GRUCell + except AttributeError: + self.gru_cell = tf.compat.v1.nn.rnn_cell.GRUCell(self.num_units) + + # Be sure to call this somewhere! + super(DynamicGRU, self).build(input_shape) + + def call(self, input_list): + """ + :param concated_embeds_value: None * field_size * embedding_size + :return: None*1 + """ + if self.gru_type == "GRU" or self.gru_type == "AIGRU": + rnn_input, sequence_length = input_list + att_score = None + else: + rnn_input, sequence_length, att_score = input_list + + rnn_output, hidden_state = dynamic_rnn(self.gru_cell, inputs=rnn_input, att_scores=att_score, + sequence_length=tf.squeeze(sequence_length, + ), dtype=tf.float32, scope=self.name) + if self.return_sequence: + return rnn_output + else: + return tf.expand_dims(hidden_state, axis=1) + + def compute_output_shape(self, input_shape): + rnn_input_shape = input_shape[0] + if self.return_sequence: + return rnn_input_shape + else: + return (None, 1, rnn_input_shape[2]) + + def get_config(self, ): + config = {'num_units': self.num_units, 'gru_type': self.gru_type, 'return_sequence': self.return_sequence} + base_config = super(DynamicGRU, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class KMaxPooling(Layer): + """K Max pooling that selects the k biggest value along the specific axis. + + Input shape + - nD tensor with shape: ``(batch_size, ..., input_dim)``. + + Output shape + - nD tensor with shape: ``(batch_size, ..., output_dim)``. + + Arguments + - **k**: positive integer, number of top elements to look for along the ``axis`` dimension. + + - **axis**: positive integer, the dimension to look for elements. + + """ + + def __init__(self, k=1, axis=-1, **kwargs): + + self.k = k + self.axis = axis + super(KMaxPooling, self).__init__(**kwargs) + + def build(self, input_shape): + + if self.axis < 1 or self.axis > len(input_shape): + raise ValueError("axis must be 1~%d,now is %d" % + (len(input_shape), self.axis)) + + if self.k < 1 or self.k > input_shape[self.axis]: + raise ValueError("k must be in 1 ~ %d,now k is %d" % + (input_shape[self.axis], self.k)) + self.dims = len(input_shape) + # Be sure to call this somewhere! + super(KMaxPooling, self).build(input_shape) + + def call(self, inputs): + + # swap the last and the axis dimensions since top_k will be applied along the last dimension + perm = list(range(self.dims)) + perm[-1], perm[self.axis] = perm[self.axis], perm[-1] + shifted_input = tf.transpose(inputs, perm) + + # extract top_k, returns two tensors [values, indices] + top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0] + output = tf.transpose(top_k, perm) + + return output + + def compute_output_shape(self, input_shape): + output_shape = list(input_shape) + output_shape[self.axis] = self.k + return tuple(output_shape) + + def get_config(self, ): + config = {'k': self.k, 'axis': self.axis} + base_config = super(KMaxPooling, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + +# def positional_encoding(inputs, +# pos_embedding_trainable=True, +# zero_pad=False, +# scale=True, +# ): +# '''Sinusoidal Positional_Encoding. +# +# Args: +# +# - inputs: A 2d Tensor with shape of (N, T). +# - num_units: Output dimensionality +# - zero_pad: Boolean. If True, all the values of the first row (id = 0) should be constant zero +# - scale: Boolean. If True, the output will be multiplied by sqrt num_units(check details from paper) +# - scope: Optional scope for `variable_scope`. +# - reuse: Boolean, whether to reuse the weights of a previous layer by the same name. +# +# Returns: +# +# - A 'Tensor' with one more rank than inputs's, with the dimensionality should be 'num_units' +# ''' +# +# _, T, num_units = inputs.get_shape().as_list() +# # with tf.variable_scope(scope, reuse=reuse): +# position_ind = tf.expand_dims(tf.range(T), 0) +# # First part of the PE function: sin and cos argument +# position_enc = np.array([ +# [pos / np.power(10000, 2. * i / num_units) +# for i in range(num_units)] +# for pos in range(T)]) +# +# # Second part, apply the cosine to even columns and sin to odds. +# position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i +# position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1 +# +# # Convert to a tensor +# +# if pos_embedding_trainable: +# lookup_table = K.variable(position_enc, dtype=tf.float32) +# +# if zero_pad: +# lookup_table = tf.concat((tf.zeros(shape=[1, num_units]), +# lookup_table[1:, :]), 0) +# +# outputs = tf.nn.embedding_lookup(lookup_table, position_ind) +# +# if scale: +# outputs = outputs * num_units ** 0.5 +# return outputs + inputs diff --git a/modelzoo/PNN/script/layers/utils.py b/modelzoo/PNN/script/layers/utils.py new file mode 100644 index 00000000000..2be8f3fe5ef --- /dev/null +++ b/modelzoo/PNN/script/layers/utils.py @@ -0,0 +1,302 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" +import tensorflow as tf +from tensorflow.python.keras.layers import Flatten, Concatenate, Layer, Add +from tensorflow.python.ops.lookup_ops import TextFileInitializer + +try: + from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal + +from tensorflow.python.keras.regularizers import l2 + +try: + from tensorflow.python.ops.lookup_ops import StaticHashTable +except ImportError: + from tensorflow.python.ops.lookup_ops import HashTable as StaticHashTable + + +class NoMask(Layer): + def __init__(self, **kwargs): + super(NoMask, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(NoMask, self).build(input_shape) + + def call(self, x, mask=None, **kwargs): + return x + + def compute_mask(self, inputs, mask): + return None + + +class Hash(Layer): + """Looks up keys in a table when setup `vocabulary_path`, which outputs the corresponding values. + If `vocabulary_path` is not set, `Hash` will hash the input to [0,num_buckets). When `mask_zero` = True, + input value `0` or `0.0` will be set to `0`, and other value will be set in range [1,num_buckets). + + The following snippet initializes a `Hash` with `vocabulary_path` file with the first column as keys and + second column as values: + + * `1,emerson` + * `2,lake` + * `3,palmer` + + >>> hash = Hash( + ... num_buckets=3+1, + ... vocabulary_path=filename, + ... default_value=0) + >>> hash(tf.constant('lake')).numpy() + 2 + >>> hash(tf.constant('lakeemerson')).numpy() + 0 + + Args: + num_buckets: An `int` that is >= 1. The number of buckets or the vocabulary size + 1 + when `vocabulary_path` is setup. + mask_zero: default is False. The `Hash` value will hash input `0` or `0.0` to value `0` when + the `mask_zero` is `True`. `mask_zero` is not used when `vocabulary_path` is setup. + vocabulary_path: default `None`. The `CSV` text file path of the vocabulary hash, which contains + two columns seperated by delimiter `comma`, the first column is the value and the second is + the key. The key data type is `string`, the value data type is `int`. The path must + be accessible from wherever `Hash` is initialized. + default_value: default '0'. The default value if a key is missing in the table. + **kwargs: Additional keyword arguments. + """ + + def __init__(self, num_buckets, mask_zero=False, vocabulary_path=None, default_value=0, **kwargs): + self.num_buckets = num_buckets + self.mask_zero = mask_zero + self.vocabulary_path = vocabulary_path + self.default_value = default_value + if self.vocabulary_path: + initializer = TextFileInitializer(vocabulary_path, 'string', 1, 'int64', 0, delimiter=',') + self.hash_table = StaticHashTable(initializer, default_value=self.default_value) + super(Hash, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(Hash, self).build(input_shape) + + def call(self, x, mask=None, **kwargs): + + if x.dtype != tf.string: + zero = tf.as_string(tf.zeros([1], dtype=x.dtype)) + x = tf.as_string(x, ) + else: + zero = tf.as_string(tf.zeros([1], dtype='int32')) + + if self.vocabulary_path: + hash_x = self.hash_table.lookup(x) + return hash_x + + num_buckets = self.num_buckets if not self.mask_zero else self.num_buckets - 1 + try: + hash_x = tf.string_to_hash_bucket_fast(x, num_buckets, + name=None) # weak hash + except AttributeError: + hash_x = tf.strings.to_hash_bucket_fast(x, num_buckets, + name=None) # weak hash + if self.mask_zero: + mask = tf.cast(tf.not_equal(x, zero), dtype='int64') + hash_x = (hash_x + 1) * mask + + return hash_x + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'num_buckets': self.num_buckets, 'mask_zero': self.mask_zero, 'vocabulary_path': self.vocabulary_path, + 'default_value': self.default_value} + base_config = super(Hash, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class Linear(Layer): + + def __init__(self, l2_reg=0.0, mode=0, use_bias=False, seed=1024, **kwargs): + + self.l2_reg = l2_reg + # self.l2_reg = tf.contrib.layers.l2_regularizer(float(l2_reg_linear)) + if mode not in [0, 1, 2]: + raise ValueError("mode must be 0,1 or 2") + self.mode = mode + self.use_bias = use_bias + self.seed = seed + super(Linear, self).__init__(**kwargs) + + def build(self, input_shape): + if self.use_bias: + self.bias = self.add_weight(name='linear_bias', + shape=(1,), + initializer=Zeros(), + trainable=True) + if self.mode == 1: + self.kernel = self.add_weight( + 'linear_kernel', + shape=[int(input_shape[-1]), 1], + initializer=glorot_normal(self.seed), + regularizer=l2(self.l2_reg), + trainable=True) + elif self.mode == 2: + self.kernel = self.add_weight( + 'linear_kernel', + shape=[int(input_shape[1][-1]), 1], + initializer=glorot_normal(self.seed), + regularizer=l2(self.l2_reg), + trainable=True) + + super(Linear, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, inputs, **kwargs): + if self.mode == 0: + sparse_input = inputs + linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=True) + elif self.mode == 1: + dense_input = inputs + fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0)) + linear_logit = fc + else: + sparse_input, dense_input = inputs + fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0)) + linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=False) + fc + if self.use_bias: + linear_logit += self.bias + + return linear_logit + + def compute_output_shape(self, input_shape): + return (None, 1) + + def compute_mask(self, inputs, mask): + return None + + def get_config(self, ): + config = {'mode': self.mode, 'l2_reg': self.l2_reg, 'use_bias': self.use_bias, 'seed': self.seed} + base_config = super(Linear, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def concat_func(inputs, axis=-1, mask=False): + if not mask: + inputs = list(map(NoMask(), inputs)) + if len(inputs) == 1: + return inputs[0] + else: + return Concatenate(axis=axis)(inputs) + + +def reduce_mean(input_tensor, + axis=None, + keep_dims=False, + name=None, + reduction_indices=None): + try: + return tf.reduce_mean(input_tensor, + axis=axis, + keep_dims=keep_dims, + name=name, + reduction_indices=reduction_indices) + except TypeError: + return tf.reduce_mean(input_tensor, + axis=axis, + keepdims=keep_dims, + name=name) + + +def reduce_sum(input_tensor, + axis=None, + keep_dims=False, + name=None, + reduction_indices=None): + try: + return tf.reduce_sum(input_tensor, + axis=axis, + keep_dims=keep_dims, + name=name, + reduction_indices=reduction_indices) + except TypeError: + return tf.reduce_sum(input_tensor, + axis=axis, + keepdims=keep_dims, + name=name) + + +def reduce_max(input_tensor, + axis=None, + keep_dims=False, + name=None, + reduction_indices=None): + try: + return tf.reduce_max(input_tensor, + axis=axis, + keep_dims=keep_dims, + name=name, + reduction_indices=reduction_indices) + except TypeError: + return tf.reduce_max(input_tensor, + axis=axis, + keepdims=keep_dims, + name=name) + + +def div(x, y, name=None): + try: + return tf.div(x, y, name=name) + except AttributeError: + return tf.divide(x, y, name=name) + + +def softmax(logits, dim=-1, name=None): + try: + return tf.nn.softmax(logits, dim=dim, name=name) + except TypeError: + return tf.nn.softmax(logits, axis=dim, name=name) + + +class _Add(Layer): + def __init__(self, **kwargs): + super(_Add, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(_Add, self).build(input_shape) + + def call(self, inputs, **kwargs): + # if not isinstance(inputs, list): + # return inputs + # if len(inputs) == 1: + # return inputs[0] + if len(inputs) == 0: + return tf.constant([[0.0]]) + + return Add()(inputs) + + +def add_func(inputs): + if not isinstance(inputs, list): + return inputs + if len(inputs) == 1: + return inputs[0] + return _Add()(inputs) + + +def combined_dnn_input(sparse_embedding_list, dense_value_list): + if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0: + sparse_dnn_input = Flatten()(concat_func(sparse_embedding_list)) + dense_dnn_input = Flatten()(concat_func(dense_value_list)) + return concat_func([sparse_dnn_input, dense_dnn_input]) + elif len(sparse_embedding_list) > 0: + return Flatten()(concat_func(sparse_embedding_list)) + elif len(dense_value_list) > 0: + return Flatten()(concat_func(dense_value_list)) + else: + raise NotImplementedError("dnn_feature_columns can not be empty list") diff --git a/modelzoo/PNN/script/models/__init__.py b/modelzoo/PNN/script/models/__init__.py new file mode 100644 index 00000000000..6c2b9cd07f5 --- /dev/null +++ b/modelzoo/PNN/script/models/__init__.py @@ -0,0 +1,3 @@ +from .pnn import PNN + +__all__ = ["PNN"] diff --git a/modelzoo/PNN/script/models/pnn.py b/modelzoo/PNN/script/models/pnn.py new file mode 100644 index 00000000000..6a75271ca81 --- /dev/null +++ b/modelzoo/PNN/script/models/pnn.py @@ -0,0 +1,72 @@ +# -*- coding:utf-8 -*- +""" +Author: + Weichen Shen, weichenswc@163.com + +Reference: + [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf) +""" + +from tensorflow.python.keras.models import Model +from tensorflow.python.keras.layers import Dense, Reshape, Flatten + +from ..feature_column import build_input_features, input_from_feature_columns +from ..layers.core import PredictionLayer, DNN +from ..layers.interaction import InnerProductLayer, OutterProductLayer +from ..layers.utils import concat_func, combined_dnn_input + + +def PNN(dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=0.00001, l2_reg_dnn=0, + seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False, kernel_type='mat', + task='binary'): + """Instantiates the Product-based Neural Network architecture. + + :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. + :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net + :param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector + :param l2_reg_dnn: float. L2 regularizer strength applied to DNN + :param seed: integer ,to use as random seed. + :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. + :param dnn_activation: Activation function to use in DNN + :param use_inner: bool,whether use inner-product or not. + :param use_outter: bool,whether use outter-product or not. + :param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'`` + :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + :return: A Keras model instance. + """ + + if kernel_type not in ['mat', 'vec', 'num']: + raise ValueError("kernel_type must be mat,vec or num") + + features = build_input_features(dnn_feature_columns) + + inputs_list = list(features.values()) + + sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, + l2_reg_embedding, seed) + inner_product = Flatten()( + InnerProductLayer()(sparse_embedding_list)) + outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list) + + # ipnn deep input + linear_signal = Reshape( + [sum(map(lambda x: int(x.shape[-1]), sparse_embedding_list))])(concat_func(sparse_embedding_list)) + + if use_inner and use_outter: + deep_input = concat_func([linear_signal, inner_product, outter_product]) + elif use_inner: + deep_input = concat_func([linear_signal, inner_product]) + elif use_outter: + deep_input = concat_func([linear_signal, outter_product]) + else: + deep_input = linear_signal + + dnn_input = combined_dnn_input([deep_input], dense_value_list) + dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) + dnn_logit = Dense(1, use_bias=False)(dnn_out) + + output = PredictionLayer(task)(dnn_logit) + + model = Model(inputs=inputs_list, + outputs=output) + return model diff --git a/modelzoo/PNN/script/utils.py b/modelzoo/PNN/script/utils.py new file mode 100644 index 00000000000..7fe3b25a518 --- /dev/null +++ b/modelzoo/PNN/script/utils.py @@ -0,0 +1,46 @@ +# -*- coding:utf-8 -*- +""" + +Author: + Weichen Shen,weichenswc@163.com + +""" + +import json +import logging +from threading import Thread + +import requests + +try: + from packaging.version import parse +except ImportError: + from pip._vendor.packaging.version import parse + + +def check_version(version): + """Return version of package on pypi.python.org using json.""" + + def check(version): + try: + url_pattern = 'https://pypi.python.org/pypi/deepctr/json' + req = requests.get(url_pattern) + latest_version = parse('0') + version = parse(version) + if req.status_code == requests.codes.ok: + j = json.loads(req.text.encode('utf-8')) + releases = j.get('releases', []) + for release in releases: + ver = parse(release) + if ver.is_prerelease or ver.is_postrelease: + continue + latest_version = max(latest_version, ver) + if latest_version > version: + logging.warning( + '\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format( + latest_version, version)) + except: + print("Please check the latest version manually on https://pypi.org/project/deepctr/#history") + return + + Thread(target=check, args=(version,)).start() diff --git a/modelzoo/PNN/train.py b/modelzoo/PNN/train.py new file mode 100644 index 00000000000..55eef980f30 --- /dev/null +++ b/modelzoo/PNN/train.py @@ -0,0 +1,259 @@ +import os +import sys +import argparse +import pandas as pd +import numpy as np +import tensorflow as tf +from sklearn.metrics import log_loss, roc_auc_score +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder, MinMaxScaler,MultiLabelBinarizer +from tensorflow.keras.optimizers import Adam,SGD +from tensorflow.keras.losses import binary_crossentropy +from scipy.sparse import coo_matrix +from script.models.pnn import PNN +from script.feature_column import SparseFeat, DenseFeat, get_feature_names,VarLenSparseFeat + + + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +UNSEQ_COLUMNS = ['UID', 'ITEM', 'CATEGORY'] +LABEL_COLUMN = ['CLICKED'] +TRAIN_DATA_COLUMNS = LABEL_COLUMN + UNSEQ_COLUMNS + +EMBEDDING_DIM=8 + +def split(x): + key_ans = x.split(',') + for key in key_ans: + if key not in key2index: + key2index[key] = len(key2index) + 1 + return list(map(lambda x: key2index[x], key_ans)) + + +#连续变量分箱处理 +def BinMap(data,acc): + if acc >=1 or acc<=0: + return print('acc must less than 1 and more than 0') + max = data.max() + min = data.min() + rangelist = [i+1 for i in range(int(1/acc))] + length = len(data)-1 + data1 = data.sort_index() + bin_res = np.array([0] * data.shape[-1], dtype=int) + for r in rangelist: + if r ==1: + lower = min + else: + lower = data1[int(length*((r-1)*acc))] + rank = r*acc + i = int(length*rank) + # x = data[np.where(data>=lower) + np.where(data=lower) & (data<=max)].index + else: + mask = data.loc[(data >= lower) & (data