diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 88dae87c..64ef108c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,7 @@ jobs: - name: Install dependencies run: | pip3 install -q tensorflow==${{ matrix.tf-version }} + pip install -q numpy==1.19.5 pip install -q requests pip install -e . - name: Test with pytest diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ebc80d58..c7258d57 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,14 +7,14 @@ If you - have spare time to learn and develop - familiar with git -please send a brief introduction of your background and experience to wcshen1994@163.com, welcome to join us! +please send a brief introduction of your background and experience to weichenswc@163.com, welcome to join us! # Creating a pull request -1. **Become a collaborator**: Send an email with introduction and your github account name to wcshen1994@163.com and waiting for invitation to become a collaborator. -2. **Fork&Dev**: Fork your own branch(`dev_yourname`) in `DeepCTR` from the `master` branch for development.If the `master` is updated during the development process, remember to merge and update to `dev_yourname` regularly. -3. **Testing**: Test logical correctness and effect when finishing the code development of the `dev_yourname` branch. -4. **Pre-release** : After testing contact wcshen1994@163.com for pre-release integration, usually your branch `dev_yourname` will be merged into `release` branch by squash merge. -5. **Release a new version**: After confirming that the change is no longer needed, `release` branch will be merged into `master` and a new python package will be released on pypi. +1. **Become a collaborator**: Send an email with introduction and your github account name to weichenswc@163.com and waiting for invitation to become a collaborator. +2. **Fork&Dev**: Fork your own branch(`dev_yourname`) in `DeepCTR` from the `master` branch for development.If the `master` is updated during the development process, remember to merge and update to `dev_yourname` regularly. +3. **Testing**: Test logical correctness and effect when finishing the code development of the `dev_yourname` branch. +4. **Pre-release** : After testing contact weichenswc@163.com for pre-release integration, usually your branch `dev_yourname` will be merged into `release` branch by squash merge. +5. **Release a new version**: After confirming that the change is no longer needed, `release` branch will be merged into `master` and a new python package will be released on pypi. # Discussions diff --git a/README.md b/README.md index 39a24f41..48e8e81f 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ ![CI status](https://github.com/shenweichen/deepctr/workflows/CI/badge.svg) [![Coverage Status](https://coveralls.io/repos/github/shenweichen/DeepCTR/badge.svg?branch=master)](https://coveralls.io/github/shenweichen/DeepCTR?branch=master) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/d4099734dc0e4bab91d332ead8c0bdd0)](https://www.codacy.com/app/wcshen1994/DeepCTR?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/DeepCTR&utm_campaign=Badge_Grade) -[![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#disscussiongroup) +[![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#DisscussionGroup) [![License](https://img.shields.io/github/license/shenweichen/deepctr.svg)](https://github.com/shenweichen/deepctr/blob/master/LICENSE) @@ -24,7 +24,7 @@ DeepCTR is a **Easy-to-use**,**Modular** and **Extendible** package of deep-lear - Provide `tensorflow estimator` interface for **large scale data** and **distributed training**. [example](https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html#getting-started-4-steps-to-deepctr-estimator-with-tfrecord) - It is compatible with both `tf 1.x` and `tf 2.x`. -Some related project: +Some related projects: - DeepMatch: https://github.com/shenweichen/DeepMatch - DeepCTR-Torch: https://github.com/shenweichen/DeepCTR-Torch @@ -74,7 +74,7 @@ If you find this code useful in your research, please cite it using the followin } ``` -## DisscussionGroup 交流群 +## DisscussionGroup - [Discussions](https://github.com/shenweichen/DeepCTR/discussions) - 公众号:**浅梦的学习笔记** @@ -82,11 +82,3 @@ If you find this code useful in your research, please cite it using the followin ![wechat](./docs/pics/code.png) -## Cooperative promotion 合作推广 -For more information about the recommendation system, such as **feature engineering, user profile, matching, ranking and multi-objective optimization, online learning and real-time computing, and more cutting-edge technologies and practical projects**: - -更多关于推荐系统的内容,如**特征工程,用户画像,召回,排序和多目标优化,在线学习与实时计算以及更多前沿技术和实战项目**等可参考: - -- [推荐系统实战](https://www.julyedu.com/course/getDetail/181?ccode=5ee751d37278c) -- [互联网计算广告实战](https://www.julyedu.com/course/getDetail/158?ccode=5ee751d37278c) - diff --git a/deepctr/estimator/models/afm.py b/deepctr/estimator/models/afm.py index 47fd2204..b56282dc 100644 --- a/deepctr/estimator/models/afm.py +++ b/deepctr/estimator/models/afm.py @@ -53,7 +53,7 @@ def _model_fn(features, labels, mode, config): linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): - sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, + sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) if use_attention: diff --git a/deepctr/estimator/models/ccpm.py b/deepctr/estimator/models/ccpm.py index 8d0f6dfd..cc788a38 100644 --- a/deepctr/estimator/models/ccpm.py +++ b/deepctr/estimator/models/ccpm.py @@ -59,7 +59,7 @@ def _model_fn(features, labels, mode, config): linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): - sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, + sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) n = len(sparse_embedding_list) l = len(conv_filters) diff --git a/deepctr/feature_column.py b/deepctr/feature_column.py index 8e198f38..69f55b09 100644 --- a/deepctr/feature_column.py +++ b/deepctr/feature_column.py @@ -93,9 +93,9 @@ class DenseFeat(namedtuple('DenseFeat', ['name', 'dimension', 'dtype', 'transfor name: feature name, dimension: dimension of the feature, default = 1. dtype: dtype of the feature, default="float32". - transform_fn: If not None, a function that can be used to transfrom + transform_fn: If not `None` , a function that can be used to transform values of the feature. the function takes the input Tensor as its - argument, and returns the output Tensor. + argument, and returns the output Tensor. (e.g. lambda x: (x - 3.0) / 4.2). """ __slots__ = () diff --git a/deepctr/layers/interaction.py b/deepctr/layers/interaction.py index 92107a11..87e159d1 100644 --- a/deepctr/layers/interaction.py +++ b/deepctr/layers/interaction.py @@ -399,14 +399,13 @@ def call(self, inputs, **kwargs): if self.parameterization == 'vector': xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0)) dot_ = tf.matmul(x_0, xl_w) - x_l = dot_ + self.bias[i] + x_l = dot_ + self.bias[i] + x_l elif self.parameterization == 'matrix': - dot_ = tf.einsum('ij,bjk->bik', self.kernels[i], x_l) # W * xi (bs, dim, 1) - dot_ = dot_ + self.bias[i] # W * xi + b - dot_ = x_0 * dot_ # x0 · (W * xi + b) Hadamard-product + xl_w = tf.einsum('ij,bjk->bik', self.kernels[i], x_l) # W * xi (bs, dim, 1) + dot_ = xl_w + self.bias[i] # W * xi + b + x_l = x_0 * dot_ + x_l # x0 · (W * xi + b) +xl Hadamard-product else: # error - print("parameterization should be 'vector' or 'matrix'") - x_l = dot_ + x_l + raise ValueError("parameterization should be 'vector' or 'matrix'") x_l = tf.squeeze(x_l, axis=2) return x_l diff --git a/deepctr/models/__init__.py b/deepctr/models/__init__.py index a8ec03f4..9e77e40d 100644 --- a/deepctr/models/__init__.py +++ b/deepctr/models/__init__.py @@ -9,7 +9,6 @@ from .fnn import FNN from .mlr import MLR from .onn import ONN -from .onn import ONN as NFFM from .nfm import NFM from .pnn import PNN from .wdl import WDL diff --git a/docs/source/Examples.md b/docs/source/Examples.md index b855c302..34d3733f 100644 --- a/docs/source/Examples.md +++ b/docs/source/Examples.md @@ -367,7 +367,8 @@ if __name__ == "__main__": batch_size=2 ** 14, num_epochs=1, shuffle_factor=0) # 3.Define Model,train,predict and evaluate - model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary') + model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', + config=tf.estimator.RunConfig(tf_random_seed=2021)) model.train(train_model_input) eval_result = model.evaluate(test_model_input) @@ -422,7 +423,7 @@ if __name__ == "__main__": # 3.generate input data for model - train, test = train_test_split(data, test_size=0.2, random_state=2020) + train, test = train_test_split(data, test_size=0.2, random_state=2021) # Not setting default value for continuous feature. filled with mean. @@ -430,7 +431,8 @@ if __name__ == "__main__": test_model_input = input_fn_pandas(test, sparse_features + dense_features, None, shuffle=False) # 4.Define Model,train,predict and evaluate - model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary') + model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', + config=tf.estimator.RunConfig(tf_random_seed=2021)) model.train(train_model_input) pred_ans_iter = model.predict(test_model_input) diff --git a/docs/source/FAQ.md b/docs/source/FAQ.md index 9317333a..41cbc3b6 100644 --- a/docs/source/FAQ.md +++ b/docs/source/FAQ.md @@ -37,6 +37,18 @@ es = EarlyStopping(monitor='val_binary_crossentropy') history = model.fit(model_input, data[target].values,batch_size=256, epochs=10, verbose=2, validation_split=0.2,callbacks=[es] ) ``` +If you are using Estimator models, you can set learning rate like: + +```python +from deepctr.estimator import DeepFMEstimator +import tensorflow as tf + +model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', + linear_optimizer=tf.train.FtrlOptimizer(0.05), dnn_optimizer=tf.train.AdagradOptimizer(0.1) + ) + +``` + ## 3. Get the attentional weights of feature interactions in AFM -------------------------------------------------------------------------- diff --git a/docs/source/Features.md b/docs/source/Features.md index bb74f280..d64acd05 100644 --- a/docs/source/Features.md +++ b/docs/source/Features.md @@ -41,7 +41,7 @@ DNN based CTR prediction models usually have following 4 modules: - name : feature name - dimension : dimension of dense feature vector. - dtype : default `float32`.dtype of input tensor. -- transform_fn : If not None, a function that can be used to transfrom +- transform_fn : If not `None` , a function that can be used to transform values of the feature. the function takes the input Tensor as its argument, and returns the output Tensor. (e.g. `lambda x: (x - 3.0) / 4.2)`. diff --git a/docs/source/History.md b/docs/source/History.md index 5653a935..b9aa12a1 100644 --- a/docs/source/History.md +++ b/docs/source/History.md @@ -1,4 +1,5 @@ # History +- 02/12/2021 : [v0.8.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.4) released.Fix bug in DCN-Mix. - 01/06/2021 : [v0.8.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.3) released.Add [DCN-Mix](./Features.html#dcn-mix-improved-deep-cross-network-with-mix-of-experts-and-matrix-kernel) model.Support `transform_fn` in `DenseFeat`. - 10/11/2020 : [v0.8.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.2) released.Refactor `DNN` Layer. - 09/12/2020 : [v0.8.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.1) released.Improve the reproducibility & fix some bugs. diff --git a/docs/source/index.rst b/docs/source/index.rst index 6b6130b0..34bd41d0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -42,31 +42,19 @@ You can read the latest code and related projects News ----- +02/12/2021 : Fix bug in DCN-Mix. `Changelog `_ + 01/06/2021 : Add `DCN-Mix <./Features.html#dcn-mix-improved-deep-cross-network-with-mix-of-experts-and-matrix-kernel>`_ (`中文介绍 `_) and support ``transform_fn`` in ``DenseFeat``. `Changelog `_ 10/11/2020 : Refactor ``DNN`` Layer. `Changelog `_ -09/12/2020 : Improve the reproducibility & fix some bugs. `Changelog `_ - -DisscussionGroup 交流群 +DisscussionGroup ----------------------- `Discussions `_ 公众号:**浅梦的学习笔记** wechat ID: **deepctrbot** .. image:: ../pics/code.png -Cooperative promotion 合作推广 ------------------------------- - -For more information about the recommendation system, such as **feature engineering, user profile, matching, ranking and multi-objective optimization, online learning and real-time computing, and more cutting-edge technologies and practical projects** : - -更多关于推荐系统的内容,如 **特征工程,用户画像,召回,排序和多目标优化,在线学习与实时计算以及更多前沿技术和实战项目** 等可参考: - -- `推荐系统实战 `_ -- `互联网计算广告实战 `_ - - - .. toctree:: :maxdepth: 2 :caption: Home: diff --git a/examples/run_estimator_pandas_classification.py b/examples/run_estimator_pandas_classification.py index 9817d20a..84fe9f9f 100644 --- a/examples/run_estimator_pandas_classification.py +++ b/examples/run_estimator_pandas_classification.py @@ -39,7 +39,7 @@ # 3.generate input data for model - train, test = train_test_split(data, test_size=0.2, random_state=2020) + train, test = train_test_split(data, test_size=0.2, random_state=2021) # Not setting default value for continuous feature. filled with mean. diff --git a/setup.py b/setup.py index 29bbe8fc..17287a9e 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ name="deepctr", version="0.8.3", author="Weichen Shen", - author_email="wcshen1994@163.com", + author_email="weichenswc@163.com", description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with tensorflow 1.x and 2.x .", long_description=long_description, long_description_content_type="text/markdown", diff --git a/tests/models/AutoInt_test.py b/tests/models/AutoInt_test.py index 65a00128..fb3e9c64 100644 --- a/tests/models/AutoInt_test.py +++ b/tests/models/AutoInt_test.py @@ -32,7 +32,6 @@ def test_AutoInt(att_layer_num, dnn_hidden_units, sparse_feature_num): def test_AutoIntEstimator(att_layer_num, dnn_hidden_units, sparse_feature_num): if not Estimator_TEST_TF1 and version.parse(tf.__version__) < version.parse('2.2.0'): return - model_name = "AutoInt" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, diff --git a/tests/models/CCPM_test.py b/tests/models/CCPM_test.py index cc2a3d16..919a36a6 100644 --- a/tests/models/CCPM_test.py +++ b/tests/models/CCPM_test.py @@ -53,7 +53,6 @@ def test_CCPM_without_seq(sparse_feature_num, dense_feature_num): def test_CCPMEstimator_without_seq(sparse_feature_num, dense_feature_num): if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": return - model_name = "CCPM" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, diff --git a/tests/models/DCNMix_test.py b/tests/models/DCNMix_test.py index bf465831..340977a2 100644 --- a/tests/models/DCNMix_test.py +++ b/tests/models/DCNMix_test.py @@ -1,9 +1,7 @@ import pytest -import tensorflow as tf from deepctr.models import DCNMix -from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ - Estimator_TEST_TF1 +from ..utils import check_model, get_test_data, SAMPLE_SIZE @pytest.mark.parametrize( diff --git a/tests/models/DCN_test.py b/tests/models/DCN_test.py index 53fbd095..772fccc9 100644 --- a/tests/models/DCN_test.py +++ b/tests/models/DCN_test.py @@ -33,7 +33,6 @@ def test_DCN(cross_num, hidden_size, sparse_feature_num, cross_parameterization) def test_DCNEstimator(cross_num, hidden_size, sparse_feature_num): if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": return - model_name = "DCN" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, diff --git a/tests/models/DeepFM_test.py b/tests/models/DeepFM_test.py index 6ef3fed2..1c219b8f 100644 --- a/tests/models/DeepFM_test.py +++ b/tests/models/DeepFM_test.py @@ -33,7 +33,6 @@ def test_DeepFM(hidden_size, sparse_feature_num): def test_DeepFMEstimator(hidden_size, sparse_feature_num): if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": return - model_name = "DeepFM" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, diff --git a/tests/models/FNN_test.py b/tests/models/FNN_test.py index 2032bcb0..882c29da 100644 --- a/tests/models/FNN_test.py +++ b/tests/models/FNN_test.py @@ -47,7 +47,6 @@ def test_FNN(sparse_feature_num, dense_feature_num): def test_FNNEstimator(sparse_feature_num, dense_feature_num): if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": return - model_name = "FNN" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, diff --git a/tests/models/FiBiNET_test.py b/tests/models/FiBiNET_test.py index 51583985..f1fce6dc 100644 --- a/tests/models/FiBiNET_test.py +++ b/tests/models/FiBiNET_test.py @@ -29,7 +29,6 @@ def test_FiBiNET(bilinear_type): def test_FiBiNETEstimator(bilinear_type): if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": return - model_name = "FiBiNET" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=2, dense_feature_num=2) diff --git a/tests/models/FwFM_test.py b/tests/models/FwFM_test.py index bf54d707..a13dcbfb 100644 --- a/tests/models/FwFM_test.py +++ b/tests/models/FwFM_test.py @@ -31,7 +31,6 @@ def test_FwFM(hidden_size, sparse_feature_num): def test_FwFMEstimator(hidden_size, sparse_feature_num): if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": return - model_name = "FwFM" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, diff --git a/tests/models/NFM_test.py b/tests/models/NFM_test.py index 037a87ac..8e1b50c5 100644 --- a/tests/models/NFM_test.py +++ b/tests/models/NFM_test.py @@ -29,7 +29,6 @@ def test_NFM(hidden_size, sparse_feature_num): def test_FNNEstimator(hidden_size, sparse_feature_num): if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": return - model_name = "NFM" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, diff --git a/tests/models/PNN_test.py b/tests/models/PNN_test.py index fd2ba791..2d5571f6 100644 --- a/tests/models/PNN_test.py +++ b/tests/models/PNN_test.py @@ -29,7 +29,6 @@ def test_PNN(use_inner, use_outter, sparse_feature_num): def test_PNNEstimator(use_inner, use_outter, sparse_feature_num): if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": return - model_name = "PNN" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, diff --git a/tests/models/WDL_test.py b/tests/models/WDL_test.py index 397f42cc..10188f59 100644 --- a/tests/models/WDL_test.py +++ b/tests/models/WDL_test.py @@ -19,7 +19,7 @@ def test_WDL(sparse_feature_num, dense_feature_num): model_name = "WDL" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, - dense_feature_num=dense_feature_num) + dense_feature_num=dense_feature_num, hash_flag=True) model = WDL(feature_columns, feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5) @@ -34,7 +34,6 @@ def test_WDL(sparse_feature_num, dense_feature_num): def test_WDLEstimator(sparse_feature_num, dense_feature_num): if not Estimator_TEST_TF1 and version.parse(tf.__version__) < version.parse('2.2.0'): return - model_name = "WDL" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num, diff --git a/tests/models/xDeepFM_test.py b/tests/models/xDeepFM_test.py index 0ad48092..37228de9 100644 --- a/tests/models/xDeepFM_test.py +++ b/tests/models/xDeepFM_test.py @@ -48,7 +48,6 @@ def test_xDeepFMEstimator(dnn_hidden_units, cin_layer_size, cin_split_half, cin_ dense_feature_dim): if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": return - model_name = "xDeepFM" sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num,