Added stub for k-fold cross validation
This commit is contained in:
parent
c2a489ce71
commit
d568242cd0
6 changed files with 115 additions and 8 deletions
|
@ -1,4 +1,5 @@
|
||||||
from pywatts import db
|
from pywatts import db
|
||||||
from pywatts import fetchdata
|
from pywatts import fetchdata
|
||||||
from pywatts import neural
|
from pywatts import neural
|
||||||
from pywatts import main
|
from pywatts import main
|
||||||
|
from pywatts import kcross
|
61
pywatts/kcross.py
Normal file
61
pywatts/kcross.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
import random
|
||||||
|
import itertools
|
||||||
|
from pywatts import db
|
||||||
|
|
||||||
|
|
||||||
|
def split(data, k):
|
||||||
|
"""Returns (X_train, y_train, X_eval, y_eval)"""
|
||||||
|
|
||||||
|
# Training features as list of dictionaries (each dict is for ONE test run)
|
||||||
|
X_train = []
|
||||||
|
# Training labels as list of dictionaries (each dict is for ONE test run)
|
||||||
|
y_train = []
|
||||||
|
# Evaluation features as list of dictionaries (each i-th dict includes all features except X_train[i])
|
||||||
|
X_eval = []
|
||||||
|
# Evaluation labels as list of dictionaries (each i-th dict includes all labels except X_train[i])
|
||||||
|
y_eval = []
|
||||||
|
|
||||||
|
data_list = data['dc'].tolist()
|
||||||
|
|
||||||
|
# Each sample has 337 elements
|
||||||
|
samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)]
|
||||||
|
# Randomly shuffle samples
|
||||||
|
random.shuffle(samples)
|
||||||
|
|
||||||
|
for i in range(0, len(samples), k):
|
||||||
|
# Create new dictionaries in the eval lists
|
||||||
|
X_eval.append({'dc': [x for x in itertools.chain(samples[i:i+k])]})
|
||||||
|
y_eval.append({'dc': []})
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(len(X_eval)):
|
||||||
|
X_train.append({'dc': []})
|
||||||
|
y_train.append({'dc': []})
|
||||||
|
for c, d in enumerate(X_eval):
|
||||||
|
if c != i:
|
||||||
|
X_train[i]['dc'].extend(d['dc'])
|
||||||
|
y_train[i]['dc'].append(y_eval[c]['dc'])
|
||||||
|
|
||||||
|
print(X_train)
|
||||||
|
print(y_train)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
return X_train, y_train, X_eval, y_eval
|
||||||
|
|
||||||
|
|
||||||
|
def train(nn, X_train, y_train, X_eval, y_eval, steps=10):
|
||||||
|
"""Trains the Network nn using k-cross-validation"""
|
||||||
|
evaluation = []
|
||||||
|
for count, train_data in enumerate(X_train):
|
||||||
|
for i in range(steps):
|
||||||
|
nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1)
|
||||||
|
print(X_eval[count])
|
||||||
|
print(len(X_eval[count]['dc']))
|
||||||
|
print(y_eval[count])
|
||||||
|
evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
|
||||||
|
print("Training %s: %s/%s" % (count, (i+1), steps))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
import pandas
|
import pandas
|
||||||
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
|
def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
|
||||||
# Create dictionary for features in hour 0 ... 335
|
# Create dictionary for features in hour 0 ... 335
|
||||||
features = {str(idx): [] for idx in range(336)}
|
features = {str(idx): [] for idx in range(336)}
|
||||||
dc_values = X['dc'].tolist()
|
#dc_values = X['dc'].tolist()
|
||||||
|
dc_values = X['dc']
|
||||||
|
|
||||||
# Iterate the empty dictionary always adding the idx-th element from the dc_values list
|
# Iterate the empty dictionary always adding the idx-th element from the dc_values list
|
||||||
for idx, value_list in features.items():
|
for idx, value_list in features.items():
|
||||||
|
@ -13,7 +15,8 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
|
||||||
|
|
||||||
labels = None
|
labels = None
|
||||||
if y is not None:
|
if y is not None:
|
||||||
labels = y['dc'].values
|
#labels = y['dc'].values
|
||||||
|
labels = y['dc']
|
||||||
|
|
||||||
if labels is None:
|
if labels is None:
|
||||||
dataset = tf.data.Dataset.from_tensor_slices(dict(features))
|
dataset = tf.data.Dataset.from_tensor_slices(dict(features))
|
||||||
|
@ -38,8 +41,8 @@ class Net:
|
||||||
def train(self, training_data, training_results, batch_size, steps):
|
def train(self, training_data, training_results, batch_size, steps):
|
||||||
self.__regressor.train(input_fn=lambda: pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=batch_size), steps=steps)
|
self.__regressor.train(input_fn=lambda: pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=batch_size), steps=steps)
|
||||||
|
|
||||||
def evaluate(self, eval_data, eval_results):
|
def evaluate(self, eval_data, eval_results, batch_size=1):
|
||||||
return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False), steps=1)
|
return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False, batch_size=batch_size), steps=1)
|
||||||
|
|
||||||
def predict1h(self, predict_data):
|
def predict1h(self, predict_data):
|
||||||
return self.__regressor.predict(input_fn=lambda: pywatts_input_fn(predict_data, num_epochs=1, shuffle=False))
|
return self.__regressor.predict(input_fn=lambda: pywatts_input_fn(predict_data, num_epochs=1, shuffle=False))
|
||||||
|
|
41
pywatts/test_kcross_train.py
Normal file
41
pywatts/test_kcross_train.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
import peewee
|
||||||
|
import tensorflow as tf
|
||||||
|
import pywatts.db
|
||||||
|
from pywatts import kcross
|
||||||
|
|
||||||
|
NUM_STATIONS_FROM_DB = 75
|
||||||
|
K = 4
|
||||||
|
NUM_EVAL_STATIONS = 40
|
||||||
|
TRAIN = True
|
||||||
|
PLOT = True
|
||||||
|
TRAIN_STEPS = 4
|
||||||
|
|
||||||
|
|
||||||
|
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
|
||||||
|
X = df
|
||||||
|
y = df['dc']
|
||||||
|
|
||||||
|
|
||||||
|
# Define feature columns and initialize Regressor
|
||||||
|
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
|
||||||
|
n = pywatts.neural.Net(feature_cols=feature_col)
|
||||||
|
|
||||||
|
|
||||||
|
# Training data
|
||||||
|
(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
|
||||||
|
|
||||||
|
|
||||||
|
train_eval = {}
|
||||||
|
|
||||||
|
if TRAIN:
|
||||||
|
# Train the model with the steps given
|
||||||
|
train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if PLOT:
|
||||||
|
# Plot training success rate (with 'average loss')
|
||||||
|
pywatts.main.plot_training(train_eval)
|
||||||
|
|
||||||
|
|
||||||
|
exit()
|
|
@ -19,5 +19,6 @@ n = pywatts.neural.Net(feature_cols=feature_col)
|
||||||
prediction = predict(n, pred_query)
|
prediction = predict(n, pred_query)
|
||||||
|
|
||||||
print(prediction)
|
print(prediction)
|
||||||
|
print(pred_result)
|
||||||
|
|
||||||
pywatts.main.eval_prediction(prediction, pred_result)
|
pywatts.main.eval_prediction(prediction, pred_result)
|
||||||
|
|
|
@ -4,11 +4,11 @@ import pywatts.db
|
||||||
from pywatts.main import *
|
from pywatts.main import *
|
||||||
|
|
||||||
NUM_STATIONS_FROM_DB = 75
|
NUM_STATIONS_FROM_DB = 75
|
||||||
NUM_TRAIN_STATIONS = 60
|
NUM_TRAIN_STATIONS = 400
|
||||||
NUM_EVAL_STATIONS = 15
|
NUM_EVAL_STATIONS = 40
|
||||||
TRAIN = True
|
TRAIN = True
|
||||||
PLOT = True
|
PLOT = True
|
||||||
TRAIN_STEPS = 10
|
TRAIN_STEPS = 50
|
||||||
|
|
||||||
|
|
||||||
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
|
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
|
||||||
|
|
Loading…
Reference in a new issue