From 78efc4d041dfe30068a82fe6f490910b63821104 Mon Sep 17 00:00:00 2001 From: reedts Date: Sat, 23 Jun 2018 15:00:16 +0200 Subject: [PATCH] Fixed feature columns --- pywatts/main.py | 47 ++++++++++++++++++--------------------- pywatts/neural.py | 40 +++++++++++++++++++++++++-------- pywatts/test_predict.py | 22 ++++++++++++++++++ pywatts/test_train.py | 49 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+), 35 deletions(-) create mode 100644 pywatts/test_predict.py create mode 100644 pywatts/test_train.py diff --git a/pywatts/main.py b/pywatts/main.py index b509c22..cba3cb8 100644 --- a/pywatts/main.py +++ b/pywatts/main.py @@ -1,25 +1,11 @@ -import numpy as np -import tensorflow as tf import matplotlib.pyplot as pp -import pywatts.neural +import numpy as np from sklearn.metrics import explained_variance_score, mean_absolute_error, median_absolute_error import pandas from random import randint -from sklearn.model_selection import train_test_split -df = pywatts.db.rows_to_df(list(range(1, 50))) -X = df -y = df['dc'] - -X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, test_size=0.2, random_state=34) - -X_test, X_val, y_test, y_val = train_test_split(X_tmp, y_tmp, test_size=0.5, random_state=23) - -feature_cols = [tf.feature_column.numeric_column(col) for col in X.columns] -n = pywatts.neural.Net(feature_cols=feature_cols) - def train_split(data, size): X_values = {'dc': [], 'temp': [], 'wind': []} @@ -27,15 +13,16 @@ def train_split(data, size): for i in range(size): rnd_idx = randint(0, data.size / data.shape[1] - 337) - X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336]) - X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336]) - X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336]) - y_values.append(data['dc'][rnd_idx + 337]) + X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist()) + X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336].tolist()) + X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336].tolist()) + y_values.append(data['dc'][rnd_idx + 337].tolist()) + return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values}) -def input_data(json_str, idx=0): +def input_query(json_str, idx=0): tmp_df = pandas.read_json(json_str) return pandas.DataFrame.from_dict( @@ -44,12 +31,17 @@ def input_data(json_str, idx=0): 'wind': tmp_df['wind'][idx]} ) +def input_result(json_str, idx=0): + tmp_df = pandas.read_json(json_str) -def train(steps=100): + return tmp_df.values[idx] + + +def train(nn, X_train, y_train, X_val, y_val, steps=100): evaluation = [] for i in range(steps): - n.train(X_train, y_train, steps=100) - evaluation.append(n.evaluate(X_val, y_val)) + nn.train(X_train, y_train, steps=100) + evaluation.append(nn.evaluate(X_val, y_val)) print("Training %s of %s" % ((i+1), steps)) return evaluation @@ -58,12 +50,15 @@ def plot_training(evaluation): loss = [] for e in evaluation: loss.append(e['average_loss']) + pp.plot(loss) + # Needed for execution in PyCharm + pp.show() -def predict(X_pred): - pred = n.predict1h(X_pred) - predictions = np.array([p['predictions'][0] for p in pred]) +def predict(nn, X_pred): + pred = nn.predict1h(X_pred) + predictions = np.array([p['predictions'] for p in pred]) return predictions diff --git a/pywatts/neural.py b/pywatts/neural.py index c1fbfc5..5d2b5dc 100644 --- a/pywatts/neural.py +++ b/pywatts/neural.py @@ -1,12 +1,34 @@ +import pandas import tensorflow as tf -def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=366): - return tf.estimator.inputs.pandas_input_fn(x=X, - y=y, - num_epochs=num_epochs, - shuffle=shuffle, - batch_size=batch_size) +# def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): +# +# return tf.estimator.inputs.pandas_input_fn(x=X, +# y=y, +# num_epochs=num_epochs, +# shuffle=shuffle, +# batch_size=batch_size) + +def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): + # Create dictionary for features in hour 0 ... 335 + features = {str(idx): [] for idx in range(336)} + dc_values = X['dc'].tolist() + + # Iterate the empty dictionary always adding the idx-th element from the dc_values list + for idx, value_list in features.items(): + value_list.extend(dc_values[int(idx)::336]) + + labels = None + if y is not None: + labels = y['dc'].values + + if labels is None: + dataset = tf.data.Dataset.from_tensor_slices(dict(features)) + else: + dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) + + return dataset.batch(batch_size) class Net: @@ -19,10 +41,10 @@ class Net: model_dir='tf_pywatts_model') def train(self, training_data, training_results, steps): - self.__regressor.train(input_fn=pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=336), steps=steps) + self.__regressor.train(input_fn=lambda: pywatts_input_fn(training_data, y=training_results, num_epochs=None, shuffle=True, batch_size=1), steps=steps) def evaluate(self, eval_data, eval_results): - return self.__regressor.evaluate(input_fn=pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False), steps=1) + return self.__regressor.evaluate(input_fn=lambda: pywatts_input_fn(eval_data, y=eval_results, num_epochs=1, shuffle=False), steps=1) def predict1h(self, predict_data): - return self.__regressor.predict(input_fn=pywatts_input_fn(predict_data, num_epochs=1, shuffle=False)) + return self.__regressor.predict(input_fn=lambda: pywatts_input_fn(predict_data, num_epochs=1, shuffle=False)) diff --git a/pywatts/test_predict.py b/pywatts/test_predict.py new file mode 100644 index 0000000..ff03d08 --- /dev/null +++ b/pywatts/test_predict.py @@ -0,0 +1,22 @@ +import tensorflow as tf +import pywatts.db +from pywatts.main import * + + +PREDICT_QUERY = "query-sample_1hour.json" +PREDICT_RESULT = PREDICT_QUERY.replace("query", "result") +QUERY_ID = 0 + + +pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID) +pred_result = input_result("../sample_data/" + PREDICT_RESULT, QUERY_ID) + + +# Define feature columns and initialize Regressor +feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)] +n = pywatts.neural.Net(feature_cols=feature_col) + +prediction = predict(n, pred_query) + + +pywatts.main.eval_prediction(prediction, pred_result) diff --git a/pywatts/test_train.py b/pywatts/test_train.py new file mode 100644 index 0000000..4a20c5f --- /dev/null +++ b/pywatts/test_train.py @@ -0,0 +1,49 @@ +import peewee +import tensorflow as tf +import pywatts.db +from pywatts.main import * + +NUM_STATIONS_FROM_DB = 50 +NUM_TRAIN_STATIONS = 1 +NUM_EVAL_STATIONS = 1 +TRAIN = True +PLOT = True +TRAIN_STEPS = 1 + + +df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB))) +X = df +y = df['dc'] + +#X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, test_size=0.2, random_state=34) +#X_test, X_val, y_test, y_val = train_test_split(X_tmp, y_tmp, test_size=0.5, random_state=23) + + +# Define feature columns and initialize Regressor +feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)] +n = pywatts.neural.Net(feature_cols=feature_col) + + +# Training data +(X_train, y_train) = train_split(df, NUM_TRAIN_STATIONS) + +# Evaluation data +(X_val, y_val) = train_split(df, NUM_EVAL_STATIONS) + + + +train_eval = {} + +if TRAIN: + + # Train the model with the steps given + train_eval = train(n, X_train, y_train, X_val, y_val, TRAIN_STEPS) + + + +if PLOT: + # Plot training success rate (with 'average loss') + pywatts.main.plot_training(train_eval) + + +exit()