From d568242cd00723295a05bb215e79baf6e475ad13 Mon Sep 17 00:00:00 2001
From: reedts <j.reedts@gmail.com>
Date: Mon, 6 Aug 2018 13:28:27 +0200
Subject: [PATCH] Added stub for k-fold cross validation

---
 pywatts/__init__.py          |  3 +-
 pywatts/kcross.py            | 61 ++++++++++++++++++++++++++++++++++++
 pywatts/neural.py            | 11 ++++---
 pywatts/test_kcross_train.py | 41 ++++++++++++++++++++++++
 pywatts/test_predict.py      |  1 +
 pywatts/test_train.py        |  6 ++--
 6 files changed, 115 insertions(+), 8 deletions(-)
 create mode 100644 pywatts/kcross.py
 create mode 100644 pywatts/test_kcross_train.py

diff --git a/pywatts/__init__.py b/pywatts/__init__.py
index c71aa1f..37f9f3a 100644
--- a/pywatts/__init__.py
+++ b/pywatts/__init__.py
@@ -1,4 +1,5 @@
 from pywatts import db
 from pywatts import fetchdata
 from pywatts import neural
-from pywatts import main
\ No newline at end of file
+from pywatts import main
+from pywatts import kcross
\ No newline at end of file
diff --git a/pywatts/kcross.py b/pywatts/kcross.py
new file mode 100644
index 0000000..3b28a15
--- /dev/null
+++ b/pywatts/kcross.py
@@ -0,0 +1,61 @@
+import random
+import itertools
+from pywatts import db
+
+
+def split(data, k):
+    """Returns (X_train, y_train, X_eval, y_eval)"""
+
+    # Training features as list of dictionaries (each dict is for ONE test run)
+    X_train = []
+    # Training labels as list of dictionaries (each dict is for ONE test run)
+    y_train = []
+    # Evaluation features as list of dictionaries (each i-th dict includes all features except X_train[i])
+    X_eval = []
+    # Evaluation labels as list of dictionaries (each i-th dict includes all labels except X_train[i])
+    y_eval = []
+
+    data_list = data['dc'].tolist()
+
+    # Each sample has 337 elements
+    samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)]
+    # Randomly shuffle samples
+    random.shuffle(samples)
+
+    for i in range(0, len(samples), k):
+        # Create new dictionaries in the eval lists
+        X_eval.append({'dc': [x for x in itertools.chain(samples[i:i+k])]})
+        y_eval.append({'dc': []})
+
+
+    for i in range(len(X_eval)):
+        X_train.append({'dc': []})
+        y_train.append({'dc': []})
+        for c, d in enumerate(X_eval):
+            if c != i:
+                X_train[i]['dc'].extend(d['dc'])
+                y_train[i]['dc'].append(y_eval[c]['dc'])
+
+    print(X_train)
+    print(y_train)
+    exit(0)
+
+    return X_train, y_train, X_eval, y_eval
+
+
+def train(nn, X_train, y_train, X_eval, y_eval, steps=10):
+    """Trains the Network nn using k-cross-validation"""
+    evaluation = []
+    for count, train_data in enumerate(X_train):
+        for i in range(steps):
+            nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1)
+            print(X_eval[count])
+            print(len(X_eval[count]['dc']))
+            print(y_eval[count])
+            evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
+            print("Training %s: %s/%s" % (count, (i+1), steps))
+
+
+
+
+
diff --git a/pywatts/neural.py b/pywatts/neural.py
index 2a7a548..bbe0b93 100644
--- a/pywatts/neural.py
+++ b/pywatts/neural.py
@@ -1,11 +1,13 @@
 import pandas
+import numpy as np
 import tensorflow as tf
 
 
 def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
     # Create dictionary for features in hour 0 ... 335
     features = {str(idx): [] for idx in range(336)}
-    dc_values = X['dc'].tolist()
+    #dc_values = X['dc'].tolist()
+    dc_values = X['dc']
 
     # Iterate the empty dictionary always adding the idx-th element from the dc_values list
     for idx, value_list in features.items():
@@ -13,7 +15,8 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
 
     labels = None
     if y is not None:
-        labels = y['dc'].values
+        #labels = y['dc'].values
+        labels = y['dc']
 
     if labels is None:
         dataset = tf.data.Dataset.from_tensor_slices(dict(features))
@@ -38,8 +41,8 @@ class Net:
     def train(self, training_data, training_results, batch_size, steps):
         self.__regressor.train(input_fn=lambda: pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=batch_size), steps=steps)
 
-    def evaluate(self, eval_data, eval_results):
-        return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False), steps=1)
+    def evaluate(self, eval_data, eval_results, batch_size=1):
+        return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False, batch_size=batch_size), steps=1)
 
     def predict1h(self, predict_data):
         return self.__regressor.predict(input_fn=lambda: pywatts_input_fn(predict_data, num_epochs=1, shuffle=False))
diff --git a/pywatts/test_kcross_train.py b/pywatts/test_kcross_train.py
new file mode 100644
index 0000000..807c0c4
--- /dev/null
+++ b/pywatts/test_kcross_train.py
@@ -0,0 +1,41 @@
+import peewee
+import tensorflow as tf
+import pywatts.db
+from pywatts import kcross
+
+NUM_STATIONS_FROM_DB = 75
+K = 4
+NUM_EVAL_STATIONS = 40
+TRAIN = True
+PLOT = True
+TRAIN_STEPS = 4
+
+
+df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
+X = df
+y = df['dc']
+
+
+# Define feature columns and initialize Regressor
+feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
+n = pywatts.neural.Net(feature_cols=feature_col)
+
+
+# Training data
+(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
+
+
+train_eval = {}
+
+if TRAIN:
+    # Train the model with the steps given
+    train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
+
+
+
+if PLOT:
+    # Plot training success rate (with 'average loss')
+    pywatts.main.plot_training(train_eval)
+
+
+exit()
diff --git a/pywatts/test_predict.py b/pywatts/test_predict.py
index 3924fba..7b76a5c 100644
--- a/pywatts/test_predict.py
+++ b/pywatts/test_predict.py
@@ -19,5 +19,6 @@ n = pywatts.neural.Net(feature_cols=feature_col)
 prediction = predict(n, pred_query)
 
 print(prediction)
+print(pred_result)
 
 pywatts.main.eval_prediction(prediction, pred_result)
diff --git a/pywatts/test_train.py b/pywatts/test_train.py
index e18093e..a378485 100644
--- a/pywatts/test_train.py
+++ b/pywatts/test_train.py
@@ -4,11 +4,11 @@ import pywatts.db
 from pywatts.main import *
 
 NUM_STATIONS_FROM_DB = 75
-NUM_TRAIN_STATIONS = 60
-NUM_EVAL_STATIONS = 15
+NUM_TRAIN_STATIONS = 400
+NUM_EVAL_STATIONS = 40
 TRAIN = True
 PLOT = True
-TRAIN_STEPS = 10
+TRAIN_STEPS = 50
 
 
 df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))