Added stub for k-fold cross validation

2018-08-06 13:28:27 +02:00 · 2018-08-06 13:28:27 +02:00 · d568242cd0
parent c2a489ce71
commit d568242cd0
6 changed files with 115 additions and 8 deletions
--- a/pywatts/init.py
+++ b/pywatts/init.py
@ -1,4 +1,5 @@
 from pywatts import db
 from pywatts import fetchdata
 from pywatts import neural
-from pywatts import main
+from pywatts import main
 from pywatts import kcross
--- a/pywatts/kcross.py
+++ b/pywatts/kcross.py
@ -0,0 +1,61 @@
 import random
 import itertools
 from pywatts import db
 def split(data, k):
    """Returns (X_train, y_train, X_eval, y_eval)"""
    # Training features as list of dictionaries (each dict is for ONE test run)
    X_train = []
    # Training labels as list of dictionaries (each dict is for ONE test run)
    y_train = []
    # Evaluation features as list of dictionaries (each i-th dict includes all features except X_train[i])
    X_eval = []
    # Evaluation labels as list of dictionaries (each i-th dict includes all labels except X_train[i])
    y_eval = []
    data_list = data['dc'].tolist()
    # Each sample has 337 elements
    samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)]
    # Randomly shuffle samples
    random.shuffle(samples)
    for i in range(0, len(samples), k):
        # Create new dictionaries in the eval lists
        X_eval.append({'dc': [x for x in itertools.chain(samples[i:i+k])]})
        y_eval.append({'dc': []})
    for i in range(len(X_eval)):
        X_train.append({'dc': []})
        y_train.append({'dc': []})
        for c, d in enumerate(X_eval):
            if c != i:
                X_train[i]['dc'].extend(d['dc'])
                y_train[i]['dc'].append(y_eval[c]['dc'])
    print(X_train)
    print(y_train)
    exit(0)
    return X_train, y_train, X_eval, y_eval
 def train(nn, X_train, y_train, X_eval, y_eval, steps=10):
    """Trains the Network nn using k-cross-validation"""
    evaluation = []
    for count, train_data in enumerate(X_train):
        for i in range(steps):
            nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1)
            print(X_eval[count])
            print(len(X_eval[count]['dc']))
            print(y_eval[count])
            evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
            print("Training %s: %s/%s" % (count, (i+1), steps))
--- a/pywatts/neural.py
+++ b/pywatts/neural.py
@ -1,11 +1,13 @@
 import pandas
 import numpy as np
 import tensorflow as tf
 def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
    # Create dictionary for features in hour 0 ... 335
    features = {str(idx): [] for idx in range(336)}
-    dc_values = X['dc'].tolist()
+    #dc_values = X['dc'].tolist()
    dc_values = X['dc']
    # Iterate the empty dictionary always adding the idx-th element from the dc_values list
    for idx, value_list in features.items():
@ -13,7 +15,8 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
    labels = None
    if y is not None:
-        labels = y['dc'].values
+        #labels = y['dc'].values
        labels = y['dc']
    if labels is None:
        dataset = tf.data.Dataset.from_tensor_slices(dict(features))
@ -38,8 +41,8 @@ class Net:
    def train(self, training_data, training_results, batch_size, steps):
        self.__regressor.train(input_fn=lambda: pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=batch_size), steps=steps)
-    def evaluate(self, eval_data, eval_results):
+    def evaluate(self, eval_data, eval_results, batch_size=1):
-        return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False), steps=1)
+        return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False, batch_size=batch_size), steps=1)
    def predict1h(self, predict_data):
        return self.__regressor.predict(input_fn=lambda: pywatts_input_fn(predict_data, num_epochs=1, shuffle=False))
--- a/pywatts/test_kcross_train.py
+++ b/pywatts/test_kcross_train.py
@ -0,0 +1,41 @@
 import peewee
 import tensorflow as tf
 import pywatts.db
 from pywatts import kcross
 NUM_STATIONS_FROM_DB = 75
 K = 4
 NUM_EVAL_STATIONS = 40
 TRAIN = True
 PLOT = True
 TRAIN_STEPS = 4
 df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
 X = df
 y = df['dc']
 # Define feature columns and initialize Regressor
 feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
 n = pywatts.neural.Net(feature_cols=feature_col)
 # Training data
 (X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
 train_eval = {}
 if TRAIN:
    # Train the model with the steps given
    train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
 if PLOT:
    # Plot training success rate (with 'average loss')
    pywatts.main.plot_training(train_eval)
 exit()
--- a/pywatts/test_predict.py
+++ b/pywatts/test_predict.py
@ -19,5 +19,6 @@ n = pywatts.neural.Net(feature_cols=feature_col)
 prediction = predict(n, pred_query)
 print(prediction)
 print(pred_result)
 pywatts.main.eval_prediction(prediction, pred_result)
--- a/pywatts/test_train.py
+++ b/pywatts/test_train.py
@ -4,11 +4,11 @@ import pywatts.db
 from pywatts.main import *
 NUM_STATIONS_FROM_DB = 75
-NUM_TRAIN_STATIONS = 60
+NUM_TRAIN_STATIONS = 400
-NUM_EVAL_STATIONS = 15
+NUM_EVAL_STATIONS = 40
 TRAIN = True
 PLOT = True
-TRAIN_STEPS = 10
+TRAIN_STEPS = 50
 df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))