import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# import seaborn as sns
# sns.set_style("whitegrid")
import gc # memory cleaning
import os
import psutil
process = psutil.Process(os.getpid())
print("Memory usage:", process.memory_info().rss/1024/1024,"MB")
N_total = 150000
N_train = 13000
X = np.random.uniform(0,1,(N_total,3))
def obj_func(x):
"""
conditional expectation
"""
return x[1]+x[2]
def obj_func2(x):
"""
conditional variance
"""
return x[0]+x[1]
#Y = np.random.normal(0,1.,N_total) + np.apply_along_axis(obj_func,1,X)
Y = np.zeros(N_total)
for i in range(N_total):
Y[i] = np.random.normal(obj_func(X[i]),np.sqrt(obj_func2(X[i])),1)
#Y = np.random.normal(0,1.,5000) + 1
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
#reg = AdaBoostRegressor(learning_rate = 0.1, n_estimators = 10)
#reg = XGBRegressor(n_estimators = 20)
reg = XGBRegressor(n_estimators = 100,
#max_depth = 15,
objective="reg:squarederror"
)
#reg = RandomForestRegressor(n_estimators = 100)
reg.fit(X[0:N_train,:],Y[0:N_train])
from sklearn.metrics import mean_squared_error, r2_score
np.sqrt(mean_squared_error(np.apply_along_axis(obj_func,1,X[N_train:]),reg.predict(X[N_train:])))
plt.figure(figsize=(15,5))
plt.plot(np.apply_along_axis(obj_func,1,X[N_train:N_train+100]), color = "darkred", label="ref")
plt.plot(reg.predict(X[N_train:N_train+100]), color = "grey", label="pred")
plt.title("Estimation of conditional expectation")
plt.grid()
plt.legend()
#reg2 = RandomForestRegressor(n_estimators = 100)
reg2 = XGBRegressor(n_estimators = 100,
#max_depth = 5,
objective="reg:squarederror")
reg2.fit(X[0:N_train,:],Y[0:N_train]**2)
np.sqrt(mean_squared_error(np.apply_along_axis(obj_func2,1,X[N_train:]),reg2.predict(X[N_train:])-reg.predict(X[N_train:])**2))
r2_score(np.apply_along_axis(obj_func2,1,X[N_train:]),reg2.predict(X[N_train:])-reg.predict(X[N_train:])**2)
plt.figure(figsize=(15,5))
#plt.plot(np.zeros(100)+1., color = "darkred", label="ref")
plt.plot(np.apply_along_axis(obj_func2,1,X[N_train:N_train+100]), color = "darkred", label="ref")
plt.plot(reg2.predict(X[N_train:N_train+100])-reg.predict(X[N_train:N_train+100])**2, color = "grey", label="pred")
plt.title("Estimation of conditional variance")
plt.grid()
plt.legend()
print("Memory usage:", process.memory_info().rss/1024/1024,"MB")