Fixed feature columns

2018-06-23 15:00:16 +02:00 · 2018-06-23 15:00:16 +02:00 · 78efc4d041
parent cab536f7f2
commit 78efc4d041
4 changed files with 123 additions and 35 deletions
--- a/pywatts/main.py
+++ b/pywatts/main.py
@ -1,25 +1,11 @@
-import numpy as np
-import tensorflow as tf
 import matplotlib.pyplot as pp
-import pywatts.neural
+import numpy as np
 from sklearn.metrics import explained_variance_score, mean_absolute_error, median_absolute_error
 import pandas
 from random import randint

-from sklearn.model_selection import train_test_split


-df = pywatts.db.rows_to_df(list(range(1, 50)))
-X = df
-y = df['dc']
-
-X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, test_size=0.2, random_state=34)
-
-X_test, X_val, y_test, y_val = train_test_split(X_tmp, y_tmp, test_size=0.5, random_state=23)
-
-feature_cols = [tf.feature_column.numeric_column(col) for col in X.columns]
-n = pywatts.neural.Net(feature_cols=feature_cols)
-

 def train_split(data, size):
    X_values = {'dc': [], 'temp': [], 'wind': []}
@ -27,15 +13,16 @@ def train_split(data, size):
    for i in range(size):
        rnd_idx = randint(0, data.size / data.shape[1] - 337)

-        X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336])
-        X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336])
-        X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336])
-        y_values.append(data['dc'][rnd_idx + 337])
+        X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist())
+        X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336].tolist())
+        X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336].tolist())
+        y_values.append(data['dc'][rnd_idx + 337].tolist())
+

    return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values})


-def input_data(json_str, idx=0):
+def input_query(json_str, idx=0):
    tmp_df = pandas.read_json(json_str)

    return pandas.DataFrame.from_dict(
@ -44,12 +31,17 @@ def input_data(json_str, idx=0):
         'wind': tmp_df['wind'][idx]}
    )

+def input_result(json_str, idx=0):
+    tmp_df = pandas.read_json(json_str)

-def train(steps=100):
+    return tmp_df.values[idx]
+
+
+def train(nn, X_train, y_train, X_val, y_val, steps=100):
    evaluation = []
    for i in range(steps):
-        n.train(X_train, y_train, steps=100)
-        evaluation.append(n.evaluate(X_val, y_val))
+        nn.train(X_train, y_train, steps=100)
+        evaluation.append(nn.evaluate(X_val, y_val))
        print("Training %s of %s" % ((i+1), steps))
    return evaluation

@ -58,12 +50,15 @@ def plot_training(evaluation):
    loss = []
    for e in evaluation:
        loss.append(e['average_loss'])
+
    pp.plot(loss)
+    # Needed for execution in PyCharm
+    pp.show()


-def predict(X_pred):
-    pred = n.predict1h(X_pred)
-    predictions = np.array([p['predictions'][0] for p in pred])
+def predict(nn, X_pred):
+    pred = nn.predict1h(X_pred)
+    predictions = np.array([p['predictions'] for p in pred])
    return predictions


--- a/pywatts/neural.py
+++ b/pywatts/neural.py
@ -1,12 +1,34 @@
+import pandas
 import tensorflow as tf


-def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=366):
-    return tf.estimator.inputs.pandas_input_fn(x=X,
-                                               y=y,
-                                               num_epochs=num_epochs,
-                                               shuffle=shuffle,
-                                               batch_size=batch_size)
+# def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
+#
+#    return tf.estimator.inputs.pandas_input_fn(x=X,
+#                                               y=y,
+#                                               num_epochs=num_epochs,
+#                                               shuffle=shuffle,
+#                                               batch_size=batch_size)
+
+def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
+    # Create dictionary for features in hour 0 ... 335
+    features = {str(idx): [] for idx in range(336)}
+    dc_values = X['dc'].tolist()
+
+    # Iterate the empty dictionary always adding the idx-th element from the dc_values list
+    for idx, value_list in features.items():
+        value_list.extend(dc_values[int(idx)::336])
+
+    labels = None
+    if y is not None:
+        labels = y['dc'].values
+
+    if labels is None:
+        dataset = tf.data.Dataset.from_tensor_slices(dict(features))
+    else:
+        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+
+    return dataset.batch(batch_size)


 class Net:
@ -19,10 +41,10 @@ class Net:
                                                     model_dir='tf_pywatts_model')

    def train(self, training_data, training_results, steps):
-        self.__regressor.train(input_fn=pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=336), steps=steps)
+        self.__regressor.train(input_fn=lambda: pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=1), steps=steps)

    def evaluate(self, eval_data, eval_results):
-        return self.__regressor.evaluate(input_fn=pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False), steps=1)
+        return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False), steps=1)

    def predict1h(self, predict_data):
-        return self.__regressor.predict(input_fn=pywatts_input_fn(predict_data, num_epochs=1, shuffle=False))
+        return self.__regressor.predict(input_fn=lambda: pywatts_input_fn(predict_data, num_epochs=1, shuffle=False))
--- a/pywatts/test_predict.py
+++ b/pywatts/test_predict.py
@ -0,0 +1,22 @@
+import tensorflow as tf
+import pywatts.db
+from pywatts.main import *
+
+
+PREDICT_QUERY = "query-sample_1hour.json"
+PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
+QUERY_ID = 0
+
+
+pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
+pred_result = input_result("../sample_data/" + PREDICT_RESULT, QUERY_ID)
+
+
+# Define feature columns and initialize Regressor
+feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
+n = pywatts.neural.Net(feature_cols=feature_col)
+
+prediction = predict(n, pred_query)
+
+
+pywatts.main.eval_prediction(prediction, pred_result)
--- a/pywatts/test_train.py
+++ b/pywatts/test_train.py
@ -0,0 +1,49 @@
+import peewee
+import tensorflow as tf
+import pywatts.db
+from pywatts.main import *
+
+NUM_STATIONS_FROM_DB = 50
+NUM_TRAIN_STATIONS = 1
+NUM_EVAL_STATIONS = 1
+TRAIN = True
+PLOT = True
+TRAIN_STEPS = 1
+
+
+df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
+X = df
+y = df['dc']
+
+#X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, test_size=0.2, random_state=34)
+#X_test, X_val, y_test, y_val = train_test_split(X_tmp, y_tmp, test_size=0.5, random_state=23)
+
+
+# Define feature columns and initialize Regressor
+feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
+n = pywatts.neural.Net(feature_cols=feature_col)
+
+
+# Training data
+(X_train, y_train) = train_split(df, NUM_TRAIN_STATIONS)
+
+# Evaluation data
+(X_val, y_val) = train_split(df, NUM_EVAL_STATIONS)
+
+
+
+train_eval = {}
+
+if TRAIN:
+
+    # Train the model with the steps given
+    train_eval = train(n, X_train, y_train, X_val, y_val, TRAIN_STEPS)
+
+
+
+if PLOT:
+    # Plot training success rate (with 'average loss')
+    pywatts.main.plot_training(train_eval)
+
+
+exit()