import pymysql
import numpy as np
import sys
import os
#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
conn=pymysql.connect(
host='am-bp1h54m85py08r99190650o.ads.aliyuncs.com',
port=3306,
user='analysis_user',
password='Fk9gFKSg5aS',
db='adv_analysis',
charset='utf8'
)
# 获取游标
cursor=conn.cursor()
# 执行sql语句
sql = 'select * from user_app_class where user_type=0 and gid=447'
sql2= 'select * from user_app_class where user_type=1 and gid=447'
all_sql = 'select * from user_app_class where user_type <= 1 and gid=447'
sql_num = 20000;
try:
# 执行SQL语句
rows=cursor.execute(sql)
nd = 848
if rows == 0 :
print("no data")
sys.exit()
print("get sql result num: " , rows);
res = cursor.fetchall()
data = np.array(res)
user_app = data[0:sql_num,3:nd+3]
user_type = data[0:sql_num,1:2]
rows2=cursor.execute(sql2)
if rows2 == 0 :
print("no 2 data")
sys.exit()
res2 = cursor.fetchall()
data2 = np.array(res2)
user_app2 = data2[:,3:nd+3]
user_type2 = data2[:,1:2]
print("user_type2:",user_type2[0:10])
print("data2 shape:",data2.shape)
print("user_app2 shape:",user_app2.shape)
rows3=cursor.execute(all_sql)
if rows3 == 0 :
print("no 3 data")
sys.exit()
res3 = cursor.fetchall()
data3 = np.array(res3)
all_user_app = data3[:,3:nd+3]
all_user_type = data3[:,1:2]
print("user_type3:",all_user_type[0:10])
print("data3 shape:",data3.shape)
print("user_app3 shape:",all_user_app.shape)
# print(type(data))
except pymysql.Error as e:
print ("Error: unable to fetch data")
print(e.args[0], e.args[1])
cursor.close()
conn.close()
print("data:",data[0:10])
print("user_app:",user_app[0:10])
print("user_type:",user_type[0:10])
train_num = 15000
test_start_num = 15000
sed = 0
x_train = np.vstack((user_app[0:train_num+sed],user_app2[0:train_num]))
print("x_train:",x_train.shape,x_train[0:10])
y_train = np.hstack((~np.vstack((user_type[0:train_num+sed],user_type2[0:train_num])).astype(bool),np.vstack((user_type[0:train_num+sed],user_type2[0:train_num]))))
print("y_train:",y_train.shape,y_train[0:10])
all_x_train = all_user_app;
#print("x_train:",x_train.shape,x_train[0:10])
all_y_train = np.hstack((~all_user_type.astype(bool),all_user_type))
#print("y_train:",y_train.shape,y_train[0:10])
#付费用户测试样本
x_test = np.vstack((user_app2[test_start_num:]))
print("x_test:",x_test.shape,x_test[0:10])
y_test = np.hstack((~user_type2[test_start_num:].astype(bool),user_type2[test_start_num:]))
print("y_test:",y_test.shape,y_test[0:10])
#非付费用户测试样本s
x_test2 = np.vstack((user_app[test_start_num+sed:]))
print("x_test2:",x_test2.shape,x_test2[0:10])
y_test2 = np.hstack((~user_type[test_start_num+sed:].astype(bool),user_type[test_start_num+sed:]))
print("y_test2:",y_test2.shape,y_test2[0:10])
#mlp_class
#输入节点数
in_units_c1 = 8
in_units_c2 = 219
in_units_c3 = 347
in_units_c4 = 176
in_units_c5 = 53
in_units_c6 = 45
#隐含层节点数
h_num = 100
h1_units = h_num
h2_units = h_num
h3_units = h_num
h4_units = h_num
h5_units = h_num
h6_units = h_num
#分类索引
c1 = 8
c2 = 227
c3 = 574
c4 = 750
c5 = 803
c6 = 848
out_units = 2 #输出节点数
W1 = tf.Variable(tf.truncated_normal([in_units_c1, h1_units], stddev=0.1)) #初始化隐含层权重W1,服从默认均值为0,标准差为0.1的截断正态分布
b1 = tf.Variable(tf.zeros([h1_units])) #隐含层偏置b1全部初始化为0
W2 = tf.Variable(tf.truncated_normal([in_units_c2, h2_units], stddev=0.1)) #初始化隐含层权重W1,服从默认均值为0,标准差为0.1的截断正态分布
b2 = tf.Variable(tf.zeros([h1_units])) #隐含层偏置b1全部初始化为0
W3 = tf.Variable(tf.truncated_normal([in_units_c3, h3_units], stddev=0.1)) #初始化隐含层权重W1,服从默认均值为0,标准差为0.1的截断正态分布
b3 = tf.Variable(tf.zeros([h1_units])) #隐含层偏置b1全部初始化为0
W4 = tf.Variable(tf.truncated_normal([in_units_c4, h4_units], stddev=0.1)) #初始化隐含层权重W1,服从默认均值为0,标准差为0.1的截断正态分布
b4 = tf.Variable(tf.zeros([h1_units])) #隐含层偏置b1全部初始化为0
W5 = tf.Variable(tf.truncated_normal([in_units_c5, h5_units], stddev=0.1)) #初始化隐含层权重W1,服从默认均值为0,标准差为0.1的截断正态分布
b5 = tf.Variable(tf.zeros([h1_units])) #隐含层偏置b1全部初始化为0
W6 = tf.Variable(tf.truncated_normal([in_units_c6, h6_units], stddev=0.1)) #初始化隐含层权重W1,服从默认均值为0,标准差为0.1的截断正态分布
b6 = tf.Variable(tf.zeros([h1_units])) #隐含层偏置b1全部初始化为0
#W7 = tf.Variable(tf.zeros([6*h_num, out_units]))
b7 = tf.Variable(tf.zeros([out_units]))
x = tf.placeholder(tf.float32, [None, nd])
keep_prob = tf.placeholder(tf.float32) #Dropout失活率
hidden1 = tf.nn.relu(tf.matmul(x[:,0:c1], W1) + b1)
hidden2 = tf.nn.relu(tf.matmul(x[:,c1:c2], W2) + b2)
hidden3 = tf.nn.relu(tf.matmul(x[:,c2:c3], W3) + b3)
hidden4 = tf.nn.relu(tf.matmul(x[:,c3:c4], W4) + b4)
hidden5 = tf.nn.relu(tf.matmul(x[:,c4:c5], W5) + b5)
hidden6 = tf.nn.relu(tf.matmul(x[:,c5:c6], W6) + b6)
#hidden = tf.concat((hidden1,hidden2,hidden3,hidden4,hidden5,hidden6),1)
hidden = tf.concat((hidden1,hidden2),1)
hidden_drop = tf.nn.dropout(hidden, keep_prob)
W7 = tf.Variable(tf.zeros([2*h_num, out_units]))
y = tf.nn.softmax(tf.matmul(hidden_drop, W7) + b7)
#训练部分
y_ = tf.placeholder(tf.float32, [None, out_units])
cross_entropy = tf.reduce_mean((y_ - y)**2)
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.AdagradOptimizer(0.01).minimize(cross_entropy)
#定义一个InteractiveSession会话并初始化全部变量
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
correct_prediction = tf.equal(tf.arg_max(y, 1), tf.arg_max(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#print(tf.get_default_graph())
for i in range(10000):
train_step.run({x: x_train, y_: y_train, keep_prob: 0.75})
if i % 100 ==0:
#训练过程每1000步在测试集上验证一下准确率,动态显示训练过程
result = accuracy.eval({x: x_test, y_: y_test,keep_prob: 1.0})
result2 = accuracy.eval({x: x_test2, y_: y_test2,keep_prob: 1.0})
result3 = cross_entropy.eval({x: x_train, y_: y_train,keep_prob: 1.0})
print(i, 'training_arruracy:',result,result2,result3)
if result == 1 and i >10000 :
break
print('final_accuracy:', accuracy.eval({x: x_test, y_: y_test, keep_prob: 1.0}),accuracy.eval({x: x_test2, y_: y_test2, keep_prob: 1.0}))
print('w1:',sess.run(W1))
print('w2:',sess.run(W2))