Add readme

Fix script again
Add fancy schmancy progress bar
2018-09-13 14:50:50 +02:00 · 2018-09-13 14:13:41 +02:00 · 2018-09-13 13:29:48 +02:00 · 2018-09-13 11:28:17 +02:00 · 2018-09-12 17:52:05 +02:00 · 2018-09-11 14:41:52 +02:00
14 changed files with 272 additions and 70 deletions
--- a/.gitignore
+++ b/.gitignore
@ -111,3 +111,9 @@ venv.bak/

 # Tensorflow Model
 tf_pywatts_model/
+
+# Tensorboard
+pywatts/tensorboard
+
+# Figures
+figures/
--- a/README.md
+++ b/README.md
@ -0,0 +1,27 @@
+PyWatts - Predict Output of Solar Panels
+
+# Dependencies
+
+PyWatts is based on python3.6 and uses the following dependencies:
+
+* requests (2.19.1)
+* pypvwatts (2.1.0)
+* numpy (1.15.0)
+* peewee (3.5.4)
+* scikit-learn (0.19.2)
+* pandas (0.23.4)
+* tensorflow (1.9.0)
+* matplotlib (2.2.3)
+* scipy (1.1.0)
+
+We suggest using a python virtualenv.
+
+# Execute
+
+The script can be executed by issuing the follwing command:
+
+```bash
+$ python photovoltaic_gruppe4.py data.json
+```
+
+The output can be found in the same directory in `test_data_gruppe4.json`
--- a/photovoltaic_gruppe4.py
+++ b/photovoltaic_gruppe4.py
@ -0,0 +1,46 @@
+import os
+import sys
+
+import tensorflow as tf
+
+import pywatts.db
+from pywatts.routines import *
+
+# get rid of TF debug message
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+if len(sys.argv) != 2:
+    print("Usage: python photovoltaic_gruppe4.py <file.json>")
+    exit(1)
+
+json_file = sys.argv[1]  # json file
+
+oneH, queries = input_queries(json_file)
+
+feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
+n = pywatts.neural.Net(feature_cols=feature_col)
+
+predictions = []
+total = len(queries)
+for idx, query in enumerate(queries):
+
+    percent = idx / total
+    sys.stdout.write("\r")
+    progress = ""
+    for i in range(20):
+        if i < int(20 * percent):
+            progress += "="
+        else:
+            progress += " "
+    sys.stdout.write("[ %s ] %.2f%%" % (progress, percent * 100))
+    sys.stdout.flush()
+
+    if oneH:
+        predictions.extend(predict(n, query).astype('Float64').tolist())
+    else:
+        predictions.append(predict24h(n, query))
+
+print(predictions, file=open("test_data_gruppe4.json", "w"))
+
+sys.stdout.write("\r")
+print("[ ==================== ] 100.00%")
--- a/pywatts/init.py
+++ b/pywatts/init.py
@ -1,5 +1,5 @@
 from pywatts import db
 from pywatts import fetchdata
 from pywatts import neural
-from pywatts import main
+from pywatts import routines
 from pywatts import kcross
--- a/pywatts/board.py
+++ b/pywatts/board.py
@ -0,0 +1,11 @@
+import tensorflow as tf
+import subprocess
+
+writer = tf.summary.FileWriter("tensorboard")
+checkpoint = tf.train.get_checkpoint_state('tf_pywatts_model_best')
+with tf.Session() as sess:
+    saver = tf.train.import_meta_graph(checkpoint.model_checkpoint_path + '.meta')
+    saver.restore(sess, checkpoint.model_checkpoint_path)
+writer.add_graph(sess.graph)
+
+subprocess.check_output(['tensorboard', '--logdir', 'tensorboard'])
--- a/pywatts/db.py
+++ b/pywatts/db.py
@ -6,8 +6,7 @@ from playhouse.sqlite_ext import SqliteExtDatabase
 import os.path

 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-db_path = os.path.join(BASE_DIR, "../pywatts.db")
-print(db_path)
+db_path = os.path.join(BASE_DIR, "pywatts.db")
 db = SqliteExtDatabase(db_path)


@ -35,21 +34,14 @@ class Result(Model):


 def rows_to_df(indices):
-    temps = []
    dcs = []
-    winds = []

    db.connect()

    for result in Result.select().where(Result.id << indices):
-        temps += result.temperature
        dcs += result.dc_output
-        winds += result.wind_speed

    db.close()

    return pd.DataFrame(
-        {'temp': temps,
-         'dc': dcs,
-         'wind': winds
-         })
+        {'dc': dcs})
--- a/pywatts/eval_training.py
+++ b/pywatts/eval_training.py
@ -0,0 +1,74 @@
+import tensorflow as tf
+import pywatts.db
+from pywatts.routines import *
+from pywatts import kcross
+
+NUM_STATIONS_FROM_DB = 75
+K = 10
+NUM_EVAL_STATIONS = 40
+TRAIN = True
+PLOT = True
+TRAIN_STEPS = 10
+TOTAL_STEPS = 6
+NUM_QUERIES = 5
+PREDICT_QUERY = "query-sample_24hour.json"
+PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
+FIGURE_OUTPUT_DIR = "../figures/"
+
+
+df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
+X = df
+y = df['dc']
+
+
+# Define feature columns and initialize Regressor
+feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
+n = pywatts.neural.Net(feature_cols=feature_col)
+
+
+# Training data
+(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
+
+
+if TRAIN:
+
+    train_eval = None
+
+    color_gradient_base = (0.5, 0, 0)
+    color_step_width = (0.5/TOTAL_STEPS, 0, 0)
+
+    for i in range(TOTAL_STEPS):
+        # Train the model with the steps given
+        train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
+
+        for q in range(NUM_QUERIES):
+
+            pred_query = input_query("../sample_data/" + PREDICT_QUERY, q)
+            pred_result = input_result("../sample_data/" + PREDICT_RESULT, q)
+
+            prediction = predict24h(n, pred_query)
+
+            pp.figure(q)
+
+            if i == 0:
+                pp.plot(pred_result, 'black')
+
+            pp.plot(prediction, color=color_gradient_base)
+            pp.savefig(FIGURE_OUTPUT_DIR+'{}.pdf'.format(q), orientation='landscape')
+
+        color_gradient_base = tuple([sum(x) for x in zip(color_gradient_base, color_step_width)])
+
+    for i in range(NUM_QUERIES):
+        pp.close(i)
+
+    if PLOT:
+        # Plot training success rate (with 'average loss')
+        loss = []
+        for e in train_eval:
+            loss.append(e['average_loss'])
+
+        pp.plot(loss)
+        # Needed for execution in PyCharm
+        pp.show()
+
+exit()
--- a/pywatts/kcross.py
+++ b/pywatts/kcross.py
@ -1,6 +1,4 @@
 import random
-import itertools
-from pywatts import db


 def split(data, k):
@ -18,58 +16,42 @@ def split(data, k):
    data_list = data['dc'].tolist()

    # Each sample has 337 elements
-    samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)]
+    samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 30)]
    # Randomly shuffle samples
    random.shuffle(samples)

    bucketsize = int(len(samples) / k)

-    print(k)
-    print(len(data))
-    print(len(samples))
-    print(bucketsize)
-
    # K steps
    for i in range(k):
-        eval_dict = []
-        train_dict = []
        eval_samples = []
        train_samples = []
        for j in range(k):
            if j == i:
-                eval_samples.extend(samples[i*bucketsize:(i+1)*bucketsize])
+                eval_samples.extend(samples[j*bucketsize:(j+1)*bucketsize])
            else:
-                train_samples.extend(samples[i*bucketsize:(i+1)*bucketsize])
+                train_samples.extend(samples[j*bucketsize:(j+1)*bucketsize])

-        for s in eval_samples:
-            # Create new dictionaries in the eval lists
-            X_eval.append({'dc': s[:-1]})
-            y_eval.append({'dc': s[-1]})
+        # Create new dictionaries in the eval lists
+        X_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 != 0]})
+        y_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 == 0]})

-        for s in train_samples:
-            X_train.append({'dc': s[:-1]})
-            y_train.append({'dc': s[-1]})
-
-        print(len(X_train) / 12)
-        #print(X_train)
-        #print(y_train)
-        exit(0)
+        X_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 != 0]})
+        y_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 == 0]})

    return X_train, y_train, X_eval, y_eval


-def train(nn, X_train, y_train, X_eval, y_eval, steps=10):
+def train(nn, X_train, y_train, X_eval, y_eval, steps=100):
    """Trains the Network nn using k-cross-validation"""
    evaluation = []
    for count, train_data in enumerate(X_train):
        for i in range(steps):
-            nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1)
-            print(X_eval[count])
-            print(len(X_eval[count]['dc']))
-            print(y_eval[count])
-            evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
+            nn.train(train_data, y_train[count], batch_size=1000, steps=30) #batch_size=int(len(train_data['dc'])/336), steps=1)
+            evaluation.append(nn.evaluate(X_eval[count], y_eval[count]))
            print("Training %s: %s/%s" % (count, (i+1), steps))

+    return evaluation



--- a/pywatts/neural.py
+++ b/pywatts/neural.py
@ -1,12 +1,9 @@
-import pandas
-import numpy as np
 import tensorflow as tf


 def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
    # Create dictionary for features in hour 0 ... 335
    features = {str(idx): [] for idx in range(336)}
-    #dc_values = X['dc'].tolist()
    dc_values = X['dc']

    # Iterate the empty dictionary always adding the idx-th element from the dc_values list
@ -15,7 +12,6 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):

    labels = None
    if y is not None:
-        #labels = y['dc'].values
        labels = y['dc']

    if labels is None:
@ -23,19 +19,22 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
    else:
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

-    if shuffle:
-        dataset.shuffle(len(features['0']))
+    if num_epochs is not None:
+        return dataset.batch(len(features['0']))

-    return dataset.batch(batch_size)
+    if shuffle:
+        return dataset.shuffle(len(features['0']*len(features)*4)).repeat().batch(batch_size)
+    else:
+        return dataset.batch(batch_size)


 class Net:
    __regressor = None
-    __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc', 'temp', 'wind']]
+    __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc']]

    def __init__(self, feature_cols=__feature_cols):
        self.__regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols,
-                                                     hidden_units=[75, 75],
+                                                     hidden_units=[64, 128, 64],
                                                     model_dir='tf_pywatts_model')

    def train(self, training_data, training_results, batch_size, steps):
--- a/pywatts/routines.py
+++ b/pywatts/routines.py
@ -9,7 +9,7 @@ from random import randint

 def train_split(data, size):
    used_idxs = []
-    X_values = {'dc': [], 'temp': [], 'wind': []}
+    X_values = {'dc': []}
    y_values = []
    for i in range(size):
        rnd_idx = randint(0, data.size / data.shape[1] - 337)
@ -20,8 +20,6 @@ def train_split(data, size):
            used_idxs.append(rnd_idx)

        X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist())
-        X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336].tolist())
-        X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336].tolist())
        y_values.append(data['dc'][rnd_idx + 337].tolist())

    return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values})
@ -31,11 +29,25 @@ def input_query(json_str, idx=0):
    tmp_df = pandas.read_json(json_str)

    return pandas.DataFrame.from_dict(
-        {'dc': tmp_df['dc'][idx],
-         'temp': tmp_df['temp'][idx],
-         'wind': tmp_df['wind'][idx]}
+        {'dc': tmp_df['dc'][idx]}
    )

+def input_queries(json_str):
+    tmp_df = pandas.read_json(json_str)
+
+    oneH = False
+    try:
+        s = tmp_df['max_temp'][0]
+    except KeyError:
+        oneH = True
+
+    queries = []
+    for i in range(len(tmp_df)):
+        queries.append(pandas.DataFrame.from_dict(
+            {'dc': tmp_df['dc'][i]}
+        ))
+    return oneH, queries
+

 def input_result(json_str, idx=0):
    tmp_df = pandas.read_json(json_str)
@ -54,17 +66,37 @@ def train(nn, X_train, y_train, X_val, y_val, steps=100):

 def plot_training(evaluation):
    loss = []
+    steps = []
    for e in evaluation:
-        loss.append(e['average_loss'])
+        loss.append(e['loss'])
+        steps.append(e['global_step'])

-    pp.plot(loss)
+    pp.plot(steps, loss)
    # Needed for execution in PyCharm
    pp.show()


 def predict(nn, X_pred):
    pred = nn.predict1h(X_pred)
-    predictions = np.array([p['predictions'] for p in pred])
+    # Cap results to 0
+    predictions = np.array([max(p['predictions'], [0]) for p in pred])
+    return predictions
+
+
+def predict24h(nn, X_pred):
+    predictions = []
+
+    input = {'dc': X_pred['dc'].tolist()}
+
+    for i in range(24):
+        pred = nn.predict1h(pandas.DataFrame.from_dict(input))
+        # Cap prediction to 0
+        predictions.extend(list([max(p['predictions'][0], 0) for p in pred]))
+        # Remove first value and append predicted value
+        del input['dc'][0]
+        input['dc'].append(predictions[-1])
+        # print("Prediction for hour %d/%d" % (i+1, 24))
+
    return predictions


@ -76,3 +108,9 @@ def eval_prediction(prediction, result):
    print("The Median Absolute Error: %.2f volt dc" % median_absolute_error(
        result, prediction))

+def jsonify(predictions):
+    json_out = "["
+    for v in predictions:
+        json_out += "[" + str(v) + "],"
+    json_out = json_out[:-1] + "]"
+    return json_out
--- a/pywatts/test_kcross_train.py
+++ b/pywatts/test_kcross_train.py
@ -1,14 +1,14 @@
-import peewee
 import tensorflow as tf
+
 import pywatts.db
 from pywatts import kcross

 NUM_STATIONS_FROM_DB = 75
-K = 4
+K = 10
 NUM_EVAL_STATIONS = 40
 TRAIN = True
 PLOT = True
-TRAIN_STEPS = 4
+TRAIN_STEPS = 10


 df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
@ -25,7 +25,7 @@ n = pywatts.neural.Net(feature_cols=feature_col)
 (X_train, y_train, X_eval, y_eval) = kcross.split(df, K)


-train_eval = {}
+#train_eval = {}

 if TRAIN:
    # Train the model with the steps given
@ -35,7 +35,7 @@ if TRAIN:

 if PLOT:
    # Plot training success rate (with 'average loss')
-    pywatts.main.plot_training(train_eval)
+    pywatts.routines.plot_training(train_eval)


 exit()
--- a/pywatts/test_predict.py
+++ b/pywatts/test_predict.py
@ -1,11 +1,11 @@
 import tensorflow as tf
 import pywatts.db
-from pywatts.main import *
+from pywatts.routines import *


 PREDICT_QUERY = "query-sample_1hour.json"
 PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
-QUERY_ID = 1
+QUERY_ID = 0


 pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
@ -21,4 +21,4 @@ prediction = predict(n, pred_query)
 print(prediction)
 print(pred_result)

-pywatts.main.eval_prediction(prediction, pred_result)
+pywatts.routines.eval_prediction(prediction, pred_result)
--- a/pywatts/test_predict24.py
+++ b/pywatts/test_predict24.py
@ -0,0 +1,27 @@
+import tensorflow as tf
+import pywatts.db
+from pywatts.routines import *
+import matplotlib.pyplot as pp
+
+
+PREDICT_QUERY = "query-sample_24hour.json"
+PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
+QUERY_ID = 0
+
+
+pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
+pred_result = input_result("../sample_data/" + PREDICT_RESULT, QUERY_ID)
+
+
+# Define feature columns and initialize Regressor
+feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
+n = pywatts.neural.Net(feature_cols=feature_col)
+
+prediction = predict24h(n, pred_query)
+
+print(prediction)
+print(pred_result)
+
+pp.plot(pred_result, 'black')
+pp.plot(prediction, 'red')
+pp.show()
--- a/pywatts/test_train.py
+++ b/pywatts/test_train.py
@ -1,7 +1,7 @@
-import peewee
 import tensorflow as tf
+
 import pywatts.db
-from pywatts.main import *
+from pywatts.routines import *

 NUM_STATIONS_FROM_DB = 75
 NUM_TRAIN_STATIONS = 400
@ -43,7 +43,7 @@ if TRAIN:

 if PLOT:
    # Plot training success rate (with 'average loss')
-    pywatts.main.plot_training(train_eval)
+    pywatts.routines.plot_training(train_eval)


 exit()
Author	SHA1	Message	Date
Paul Schaub	a8fd0844d2	Add readme	2018-09-13 14:50:50 +02:00
Paul Schaub	688b4025df	Fix script again	2018-09-13 14:13:41 +02:00
Paul Schaub	615428944a	Add fancy schmancy progress bar	2018-09-13 13:29:48 +02:00
Paul Schaub	ed68d78d98	Script output	2018-09-13 11:28:17 +02:00
Paul Schaub	70edcea2ca	Rubix changes	2018-09-12 17:52:05 +02:00
Paul Schaub	f668ceaf6a	Rename and fix script	2018-09-11 14:41:52 +02:00
reedts	f5735fa2f1	Minor fixes	2018-09-10 19:44:42 +02:00
Paul Schaub	65756a18a4	Merge	2018-09-09 17:27:13 +02:00
Paul Schaub	c6261134c9	Add prediction script	2018-09-09 17:25:43 +02:00
reedts	51d0e9cea8	Added new test configuration	2018-08-14 22:20:40 +02:00
reedts	dfddb8799e	Merge branch 'master' of github.com:vanitasvitae/pywatts	2018-08-14 15:22:07 +02:00
reedts	e019f1bee7	Fixed shuffling	2018-08-14 15:21:39 +02:00
Paul Schaub	e97ba96dd4	Add figures folder to gitignore	2018-08-13 18:57:16 +02:00
reedts	173d5762bc	Fix multiple graphs and plot while evaluating	2018-08-13 18:54:34 +02:00
Paul Schaub	525298f761	Fix reference to old main class	2018-08-13 17:17:37 +02:00
reedts	d4da4ca121	Add eval_training script	2018-08-13 16:35:03 +02:00
reedts	0eef892e0c	Removed unnecessary line	2018-08-13 14:43:19 +02:00
reedts	68e9b9ddd0	Cap all prediction values to zero	2018-08-13 14:42:31 +02:00
reedts	aec38b2764	Merge branch 'master' of github.com:vanitasvitae/pywatts	2018-08-13 14:31:58 +02:00
reedts	841690f98b	Capping to zero	2018-08-13 14:31:39 +02:00
Paul Schaub	ba0c7bc2ea	Add tensorboard to gitignore	2018-08-13 14:20:38 +02:00
reedts	0e228772dc	Added 24 hour prediction	2018-08-13 14:19:39 +02:00
Paul Schaub	fd623c32de	Add tensorboard export script	2018-08-13 12:38:22 +02:00
reedts	0c07241104	Removed unnecessary lines	2018-08-13 10:02:50 +02:00
reedts	288be08699	Fixed (?) kcross	2018-08-09 11:54:33 +02:00
Paul Schaub	2dfe5ef1b6	Fix kcross validation	2018-08-07 17:54:05 +02:00