Added stub for k-fold cross validation

2018-08-06 13:28:27 +02:00 · 2018-08-06 13:28:27 +02:00 · d568242cd0
commit d568242cd0
parent c2a489ce71
6 changed files with 115 additions and 8 deletions
--- a/pywatts/init.py
+++ b/pywatts/init.py
@ -1,4 +1,5 @@
 from pywatts import db
 from pywatts import fetchdata
 from pywatts import neural
-from pywatts import main
+from pywatts import main
+from pywatts import kcross
--- a/pywatts/kcross.py
+++ b/pywatts/kcross.py
@ -0,0 +1,61 @@
+import random
+import itertools
+from pywatts import db
+
+
+def split(data, k):
+    """Returns (X_train, y_train, X_eval, y_eval)"""
+
+    # Training features as list of dictionaries (each dict is for ONE test run)
+    X_train = []
+    # Training labels as list of dictionaries (each dict is for ONE test run)
+    y_train = []
+    # Evaluation features as list of dictionaries (each i-th dict includes all features except X_train[i])
+    X_eval = []
+    # Evaluation labels as list of dictionaries (each i-th dict includes all labels except X_train[i])
+    y_eval = []
+
+    data_list = data['dc'].tolist()
+
+    # Each sample has 337 elements
+    samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)]
+    # Randomly shuffle samples
+    random.shuffle(samples)
+
+    for i in range(0, len(samples), k):
+        # Create new dictionaries in the eval lists
+        X_eval.append({'dc': [x for x in itertools.chain(samples[i:i+k])]})
+        y_eval.append({'dc': []})
+
+
+    for i in range(len(X_eval)):
+        X_train.append({'dc': []})
+        y_train.append({'dc': []})
+        for c, d in enumerate(X_eval):
+            if c != i:
+                X_train[i]['dc'].extend(d['dc'])
+                y_train[i]['dc'].append(y_eval[c]['dc'])
+
+    print(X_train)
+    print(y_train)
+    exit(0)
+
+    return X_train, y_train, X_eval, y_eval
+
+
+def train(nn, X_train, y_train, X_eval, y_eval, steps=10):
+    """Trains the Network nn using k-cross-validation"""
+    evaluation = []
+    for count, train_data in enumerate(X_train):
+        for i in range(steps):
+            nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1)
+            print(X_eval[count])
+            print(len(X_eval[count]['dc']))
+            print(y_eval[count])
+            evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
+            print("Training %s: %s/%s" % (count, (i+1), steps))
+
+
+
+
+
--- a/pywatts/neural.py
+++ b/pywatts/neural.py
@ -1,11 +1,13 @@
 import pandas
+import numpy as np
 import tensorflow as tf


 def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
    # Create dictionary for features in hour 0 ... 335
    features = {str(idx): [] for idx in range(336)}
-    dc_values = X['dc'].tolist()
+    #dc_values = X['dc'].tolist()
+    dc_values = X['dc']

    # Iterate the empty dictionary always adding the idx-th element from the dc_values list
    for idx, value_list in features.items():
@ -13,7 +15,8 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):

    labels = None
    if y is not None:
-        labels = y['dc'].values
+        #labels = y['dc'].values
+        labels = y['dc']

    if labels is None:
        dataset = tf.data.Dataset.from_tensor_slices(dict(features))
@ -38,8 +41,8 @@ class Net:
    def train(self, training_data, training_results, batch_size, steps):
        self.__regressor.train(input_fn=lambda: pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=batch_size), steps=steps)

-    def evaluate(self, eval_data, eval_results):
-        return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False), steps=1)
+    def evaluate(self, eval_data, eval_results, batch_size=1):
+        return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False, batch_size=batch_size), steps=1)

    def predict1h(self, predict_data):
        return self.__regressor.predict(input_fn=lambda: pywatts_input_fn(predict_data, num_epochs=1, shuffle=False))
--- a/pywatts/test_kcross_train.py
+++ b/pywatts/test_kcross_train.py
@ -0,0 +1,41 @@
+import peewee
+import tensorflow as tf
+import pywatts.db
+from pywatts import kcross
+
+NUM_STATIONS_FROM_DB = 75
+K = 4
+NUM_EVAL_STATIONS = 40
+TRAIN = True
+PLOT = True
+TRAIN_STEPS = 4
+
+
+df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
+X = df
+y = df['dc']
+
+
+# Define feature columns and initialize Regressor
+feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
+n = pywatts.neural.Net(feature_cols=feature_col)
+
+
+# Training data
+(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
+
+
+train_eval = {}
+
+if TRAIN:
+    # Train the model with the steps given
+    train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
+
+
+
+if PLOT:
+    # Plot training success rate (with 'average loss')
+    pywatts.main.plot_training(train_eval)
+
+
+exit()
--- a/pywatts/test_predict.py
+++ b/pywatts/test_predict.py
@ -19,5 +19,6 @@ n = pywatts.neural.Net(feature_cols=feature_col)
 prediction = predict(n, pred_query)

 print(prediction)
+print(pred_result)

 pywatts.main.eval_prediction(prediction, pred_result)
--- a/pywatts/test_train.py
+++ b/pywatts/test_train.py
@ -4,11 +4,11 @@ import pywatts.db
 from pywatts.main import *

 NUM_STATIONS_FROM_DB = 75
-NUM_TRAIN_STATIONS = 60
-NUM_EVAL_STATIONS = 15
+NUM_TRAIN_STATIONS = 400
+NUM_EVAL_STATIONS = 40
 TRAIN = True
 PLOT = True
-TRAIN_STEPS = 10
+TRAIN_STEPS = 50


 df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))