Added stub for k-fold cross validation

This commit is contained in:
reedts 2018-08-06 13:28:27 +02:00
parent c2a489ce71
commit d568242cd0
6 changed files with 115 additions and 8 deletions

View file

@ -2,3 +2,4 @@ from pywatts import db
from pywatts import fetchdata from pywatts import fetchdata
from pywatts import neural from pywatts import neural
from pywatts import main from pywatts import main
from pywatts import kcross

61
pywatts/kcross.py Normal file
View file

@ -0,0 +1,61 @@
import random
import itertools
from pywatts import db
def split(data, k):
"""Returns (X_train, y_train, X_eval, y_eval)"""
# Training features as list of dictionaries (each dict is for ONE test run)
X_train = []
# Training labels as list of dictionaries (each dict is for ONE test run)
y_train = []
# Evaluation features as list of dictionaries (each i-th dict includes all features except X_train[i])
X_eval = []
# Evaluation labels as list of dictionaries (each i-th dict includes all labels except X_train[i])
y_eval = []
data_list = data['dc'].tolist()
# Each sample has 337 elements
samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)]
# Randomly shuffle samples
random.shuffle(samples)
for i in range(0, len(samples), k):
# Create new dictionaries in the eval lists
X_eval.append({'dc': [x for x in itertools.chain(samples[i:i+k])]})
y_eval.append({'dc': []})
for i in range(len(X_eval)):
X_train.append({'dc': []})
y_train.append({'dc': []})
for c, d in enumerate(X_eval):
if c != i:
X_train[i]['dc'].extend(d['dc'])
y_train[i]['dc'].append(y_eval[c]['dc'])
print(X_train)
print(y_train)
exit(0)
return X_train, y_train, X_eval, y_eval
def train(nn, X_train, y_train, X_eval, y_eval, steps=10):
"""Trains the Network nn using k-cross-validation"""
evaluation = []
for count, train_data in enumerate(X_train):
for i in range(steps):
nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1)
print(X_eval[count])
print(len(X_eval[count]['dc']))
print(y_eval[count])
evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
print("Training %s: %s/%s" % (count, (i+1), steps))

View file

@ -1,11 +1,13 @@
import pandas import pandas
import numpy as np
import tensorflow as tf import tensorflow as tf
def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
# Create dictionary for features in hour 0 ... 335 # Create dictionary for features in hour 0 ... 335
features = {str(idx): [] for idx in range(336)} features = {str(idx): [] for idx in range(336)}
dc_values = X['dc'].tolist() #dc_values = X['dc'].tolist()
dc_values = X['dc']
# Iterate the empty dictionary always adding the idx-th element from the dc_values list # Iterate the empty dictionary always adding the idx-th element from the dc_values list
for idx, value_list in features.items(): for idx, value_list in features.items():
@ -13,7 +15,8 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
labels = None labels = None
if y is not None: if y is not None:
labels = y['dc'].values #labels = y['dc'].values
labels = y['dc']
if labels is None: if labels is None:
dataset = tf.data.Dataset.from_tensor_slices(dict(features)) dataset = tf.data.Dataset.from_tensor_slices(dict(features))
@ -38,8 +41,8 @@ class Net:
def train(self, training_data, training_results, batch_size, steps): def train(self, training_data, training_results, batch_size, steps):
self.__regressor.train(input_fn=lambda: pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=batch_size), steps=steps) self.__regressor.train(input_fn=lambda: pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=batch_size), steps=steps)
def evaluate(self, eval_data, eval_results): def evaluate(self, eval_data, eval_results, batch_size=1):
return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False), steps=1) return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False, batch_size=batch_size), steps=1)
def predict1h(self, predict_data): def predict1h(self, predict_data):
return self.__regressor.predict(input_fn=lambda: pywatts_input_fn(predict_data, num_epochs=1, shuffle=False)) return self.__regressor.predict(input_fn=lambda: pywatts_input_fn(predict_data, num_epochs=1, shuffle=False))

View file

@ -0,0 +1,41 @@
import peewee
import tensorflow as tf
import pywatts.db
from pywatts import kcross
NUM_STATIONS_FROM_DB = 75
K = 4
NUM_EVAL_STATIONS = 40
TRAIN = True
PLOT = True
TRAIN_STEPS = 4
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
X = df
y = df['dc']
# Define feature columns and initialize Regressor
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
# Training data
(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
train_eval = {}
if TRAIN:
# Train the model with the steps given
train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
if PLOT:
# Plot training success rate (with 'average loss')
pywatts.main.plot_training(train_eval)
exit()

View file

@ -19,5 +19,6 @@ n = pywatts.neural.Net(feature_cols=feature_col)
prediction = predict(n, pred_query) prediction = predict(n, pred_query)
print(prediction) print(prediction)
print(pred_result)
pywatts.main.eval_prediction(prediction, pred_result) pywatts.main.eval_prediction(prediction, pred_result)

View file

@ -4,11 +4,11 @@ import pywatts.db
from pywatts.main import * from pywatts.main import *
NUM_STATIONS_FROM_DB = 75 NUM_STATIONS_FROM_DB = 75
NUM_TRAIN_STATIONS = 60 NUM_TRAIN_STATIONS = 400
NUM_EVAL_STATIONS = 15 NUM_EVAL_STATIONS = 40
TRAIN = True TRAIN = True
PLOT = True PLOT = True
TRAIN_STEPS = 10 TRAIN_STEPS = 50
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB))) df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))