14 changed files with 70 additions and 272 deletions
--- a/.gitignore
+++ b/.gitignore
@ -111,9 +111,3 @@ venv.bak/

 # Tensorflow Model
 tf_pywatts_model/
-
-# Tensorboard
-pywatts/tensorboard
-
-# Figures
-figures/
--- a/README.md
+++ b/README.md
@ -1,27 +0,0 @@
-PyWatts - Predict Output of Solar Panels
-
-# Dependencies
-
-PyWatts is based on python3.6 and uses the following dependencies:
-
-* requests (2.19.1)
-* pypvwatts (2.1.0)
-* numpy (1.15.0)
-* peewee (3.5.4)
-* scikit-learn (0.19.2)
-* pandas (0.23.4)
-* tensorflow (1.9.0)
-* matplotlib (2.2.3)
-* scipy (1.1.0)
-
-We suggest using a python virtualenv.
-
-# Execute
-
-The script can be executed by issuing the follwing command:
-
-```bash
-$ python photovoltaic_gruppe4.py data.json
-```
-
-The output can be found in the same directory in `test_data_gruppe4.json`
--- a/photovoltaic_gruppe4.py
+++ b/photovoltaic_gruppe4.py
@ -1,46 +0,0 @@
-import os
-import sys
-
-import tensorflow as tf
-
-import pywatts.db
-from pywatts.routines import *
-
-# get rid of TF debug message
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-
-if len(sys.argv) != 2:
-    print("Usage: python photovoltaic_gruppe4.py <file.json>")
-    exit(1)
-
-json_file = sys.argv[1]  # json file
-
-oneH, queries = input_queries(json_file)
-
-feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
-n = pywatts.neural.Net(feature_cols=feature_col)
-
-predictions = []
-total = len(queries)
-for idx, query in enumerate(queries):
-
-    percent = idx / total
-    sys.stdout.write("\r")
-    progress = ""
-    for i in range(20):
-        if i < int(20 * percent):
-            progress += "="
-        else:
-            progress += " "
-    sys.stdout.write("[ %s ] %.2f%%" % (progress, percent * 100))
-    sys.stdout.flush()
-
-    if oneH:
-        predictions.extend(predict(n, query).astype('Float64').tolist())
-    else:
-        predictions.append(predict24h(n, query))
-
-print(predictions, file=open("test_data_gruppe4.json", "w"))
-
-sys.stdout.write("\r")
-print("[ ==================== ] 100.00%")
--- a/pywatts/init.py
+++ b/pywatts/init.py
@ -1,5 +1,5 @@
 from pywatts import db
 from pywatts import fetchdata
 from pywatts import neural
-from pywatts import routines
+from pywatts import main
 from pywatts import kcross
--- a/pywatts/board.py
+++ b/pywatts/board.py
@ -1,11 +0,0 @@
-import tensorflow as tf
-import subprocess
-
-writer = tf.summary.FileWriter("tensorboard")
-checkpoint = tf.train.get_checkpoint_state('tf_pywatts_model_best')
-with tf.Session() as sess:
-    saver = tf.train.import_meta_graph(checkpoint.model_checkpoint_path + '.meta')
-    saver.restore(sess, checkpoint.model_checkpoint_path)
-writer.add_graph(sess.graph)
-
-subprocess.check_output(['tensorboard', '--logdir', 'tensorboard'])
--- a/pywatts/db.py
+++ b/pywatts/db.py
@ -6,7 +6,8 @@ from playhouse.sqlite_ext import SqliteExtDatabase
 import os.path

 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-db_path = os.path.join(BASE_DIR, "pywatts.db")
+db_path = os.path.join(BASE_DIR, "../pywatts.db")
+print(db_path)
 db = SqliteExtDatabase(db_path)


@ -34,14 +35,21 @@ class Result(Model):


 def rows_to_df(indices):
+    temps = []
    dcs = []
+    winds = []

    db.connect()

    for result in Result.select().where(Result.id << indices):
+        temps += result.temperature
        dcs += result.dc_output
+        winds += result.wind_speed

    db.close()

    return pd.DataFrame(
-        {'dc': dcs})
+        {'temp': temps,
+         'dc': dcs,
+         'wind': winds
+         })
--- a/pywatts/eval_training.py
+++ b/pywatts/eval_training.py
@ -1,74 +0,0 @@
-import tensorflow as tf
-import pywatts.db
-from pywatts.routines import *
-from pywatts import kcross
-
-NUM_STATIONS_FROM_DB = 75
-K = 10
-NUM_EVAL_STATIONS = 40
-TRAIN = True
-PLOT = True
-TRAIN_STEPS = 10
-TOTAL_STEPS = 6
-NUM_QUERIES = 5
-PREDICT_QUERY = "query-sample_24hour.json"
-PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
-FIGURE_OUTPUT_DIR = "../figures/"
-
-
-df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
-X = df
-y = df['dc']
-
-
-# Define feature columns and initialize Regressor
-feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
-n = pywatts.neural.Net(feature_cols=feature_col)
-
-
-# Training data
-(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
-
-
-if TRAIN:
-
-    train_eval = None
-
-    color_gradient_base = (0.5, 0, 0)
-    color_step_width = (0.5/TOTAL_STEPS, 0, 0)
-
-    for i in range(TOTAL_STEPS):
-        # Train the model with the steps given
-        train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
-
-        for q in range(NUM_QUERIES):
-
-            pred_query = input_query("../sample_data/" + PREDICT_QUERY, q)
-            pred_result = input_result("../sample_data/" + PREDICT_RESULT, q)
-
-            prediction = predict24h(n, pred_query)
-
-            pp.figure(q)
-
-            if i == 0:
-                pp.plot(pred_result, 'black')
-
-            pp.plot(prediction, color=color_gradient_base)
-            pp.savefig(FIGURE_OUTPUT_DIR+'{}.pdf'.format(q), orientation='landscape')
-
-        color_gradient_base = tuple([sum(x) for x in zip(color_gradient_base, color_step_width)])
-
-    for i in range(NUM_QUERIES):
-        pp.close(i)
-
-    if PLOT:
-        # Plot training success rate (with 'average loss')
-        loss = []
-        for e in train_eval:
-            loss.append(e['average_loss'])
-
-        pp.plot(loss)
-        # Needed for execution in PyCharm
-        pp.show()
-
-exit()
--- a/pywatts/kcross.py
+++ b/pywatts/kcross.py
@ -1,4 +1,6 @@
 import random
+import itertools
+from pywatts import db


 def split(data, k):
@ -16,42 +18,58 @@ def split(data, k):
    data_list = data['dc'].tolist()

    # Each sample has 337 elements
-    samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 30)]
+    samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)]
    # Randomly shuffle samples
    random.shuffle(samples)

    bucketsize = int(len(samples) / k)

+    print(k)
+    print(len(data))
+    print(len(samples))
+    print(bucketsize)
+
    # K steps
    for i in range(k):
+        eval_dict = []
+        train_dict = []
        eval_samples = []
        train_samples = []
        for j in range(k):
            if j == i:
-                eval_samples.extend(samples[j*bucketsize:(j+1)*bucketsize])
+                eval_samples.extend(samples[i*bucketsize:(i+1)*bucketsize])
            else:
-                train_samples.extend(samples[j*bucketsize:(j+1)*bucketsize])
+                train_samples.extend(samples[i*bucketsize:(i+1)*bucketsize])

-        # Create new dictionaries in the eval lists
-        X_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 != 0]})
-        y_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 == 0]})
+        for s in eval_samples:
+            # Create new dictionaries in the eval lists
+            X_eval.append({'dc': s[:-1]})
+            y_eval.append({'dc': s[-1]})

-        X_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 != 0]})
-        y_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 == 0]})
+        for s in train_samples:
+            X_train.append({'dc': s[:-1]})
+            y_train.append({'dc': s[-1]})
+
+        print(len(X_train) / 12)
+        #print(X_train)
+        #print(y_train)
+        exit(0)

    return X_train, y_train, X_eval, y_eval


-def train(nn, X_train, y_train, X_eval, y_eval, steps=100):
+def train(nn, X_train, y_train, X_eval, y_eval, steps=10):
    """Trains the Network nn using k-cross-validation"""
    evaluation = []
    for count, train_data in enumerate(X_train):
        for i in range(steps):
-            nn.train(train_data, y_train[count], batch_size=1000, steps=30) #batch_size=int(len(train_data['dc'])/336), steps=1)
-            evaluation.append(nn.evaluate(X_eval[count], y_eval[count]))
+            nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1)
+            print(X_eval[count])
+            print(len(X_eval[count]['dc']))
+            print(y_eval[count])
+            evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
            print("Training %s: %s/%s" % (count, (i+1), steps))

-    return evaluation



--- a/pywatts/routines.py
+++ b/pywatts/routines.py
@ -9,7 +9,7 @@ from random import randint

 def train_split(data, size):
    used_idxs = []
-    X_values = {'dc': []}
+    X_values = {'dc': [], 'temp': [], 'wind': []}
    y_values = []
    for i in range(size):
        rnd_idx = randint(0, data.size / data.shape[1] - 337)
@ -20,6 +20,8 @@ def train_split(data, size):
            used_idxs.append(rnd_idx)

        X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist())
+        X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336].tolist())
+        X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336].tolist())
        y_values.append(data['dc'][rnd_idx + 337].tolist())

    return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values})
@ -29,25 +31,11 @@ def input_query(json_str, idx=0):
    tmp_df = pandas.read_json(json_str)

    return pandas.DataFrame.from_dict(
-        {'dc': tmp_df['dc'][idx]}
+        {'dc': tmp_df['dc'][idx],
+         'temp': tmp_df['temp'][idx],
+         'wind': tmp_df['wind'][idx]}
    )

-def input_queries(json_str):
-    tmp_df = pandas.read_json(json_str)
-
-    oneH = False
-    try:
-        s = tmp_df['max_temp'][0]
-    except KeyError:
-        oneH = True
-
-    queries = []
-    for i in range(len(tmp_df)):
-        queries.append(pandas.DataFrame.from_dict(
-            {'dc': tmp_df['dc'][i]}
-        ))
-    return oneH, queries
-

 def input_result(json_str, idx=0):
    tmp_df = pandas.read_json(json_str)
@ -66,37 +54,17 @@ def train(nn, X_train, y_train, X_val, y_val, steps=100):

 def plot_training(evaluation):
    loss = []
-    steps = []
    for e in evaluation:
-        loss.append(e['loss'])
-        steps.append(e['global_step'])
+        loss.append(e['average_loss'])

-    pp.plot(steps, loss)
+    pp.plot(loss)
    # Needed for execution in PyCharm
    pp.show()


 def predict(nn, X_pred):
    pred = nn.predict1h(X_pred)
-    # Cap results to 0
-    predictions = np.array([max(p['predictions'], [0]) for p in pred])
-    return predictions
-
-
-def predict24h(nn, X_pred):
-    predictions = []
-
-    input = {'dc': X_pred['dc'].tolist()}
-
-    for i in range(24):
-        pred = nn.predict1h(pandas.DataFrame.from_dict(input))
-        # Cap prediction to 0
-        predictions.extend(list([max(p['predictions'][0], 0) for p in pred]))
-        # Remove first value and append predicted value
-        del input['dc'][0]
-        input['dc'].append(predictions[-1])
-        # print("Prediction for hour %d/%d" % (i+1, 24))
-
+    predictions = np.array([p['predictions'] for p in pred])
    return predictions


@ -108,9 +76,3 @@ def eval_prediction(prediction, result):
    print("The Median Absolute Error: %.2f volt dc" % median_absolute_error(
        result, prediction))

-def jsonify(predictions):
-    json_out = "["
-    for v in predictions:
-        json_out += "[" + str(v) + "],"
-    json_out = json_out[:-1] + "]"
-    return json_out
--- a/pywatts/neural.py
+++ b/pywatts/neural.py
@ -1,9 +1,12 @@
+import pandas
+import numpy as np
 import tensorflow as tf


 def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
    # Create dictionary for features in hour 0 ... 335
    features = {str(idx): [] for idx in range(336)}
+    #dc_values = X['dc'].tolist()
    dc_values = X['dc']

    # Iterate the empty dictionary always adding the idx-th element from the dc_values list
@ -12,6 +15,7 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):

    labels = None
    if y is not None:
+        #labels = y['dc'].values
        labels = y['dc']

    if labels is None:
@ -19,22 +23,19 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
    else:
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

-    if num_epochs is not None:
-        return dataset.batch(len(features['0']))
-
    if shuffle:
-        return dataset.shuffle(len(features['0']*len(features)*4)).repeat().batch(batch_size)
-    else:
-        return dataset.batch(batch_size)
+        dataset.shuffle(len(features['0']))
+
+    return dataset.batch(batch_size)


 class Net:
    __regressor = None
-    __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc']]
+    __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc', 'temp', 'wind']]

    def __init__(self, feature_cols=__feature_cols):
        self.__regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols,
-                                                     hidden_units=[64, 128, 64],
+                                                     hidden_units=[75, 75],
                                                     model_dir='tf_pywatts_model')

    def train(self, training_data, training_results, batch_size, steps):
--- a/pywatts/test_kcross_train.py
+++ b/pywatts/test_kcross_train.py
@ -1,14 +1,14 @@
+import peewee
 import tensorflow as tf
-
 import pywatts.db
 from pywatts import kcross

 NUM_STATIONS_FROM_DB = 75
-K = 10
+K = 4
 NUM_EVAL_STATIONS = 40
 TRAIN = True
 PLOT = True
-TRAIN_STEPS = 10
+TRAIN_STEPS = 4


 df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
@ -25,7 +25,7 @@ n = pywatts.neural.Net(feature_cols=feature_col)
 (X_train, y_train, X_eval, y_eval) = kcross.split(df, K)


-#train_eval = {}
+train_eval = {}

 if TRAIN:
    # Train the model with the steps given
@ -35,7 +35,7 @@ if TRAIN:

 if PLOT:
    # Plot training success rate (with 'average loss')
-    pywatts.routines.plot_training(train_eval)
+    pywatts.main.plot_training(train_eval)


 exit()
--- a/pywatts/test_predict.py
+++ b/pywatts/test_predict.py
@ -1,11 +1,11 @@
 import tensorflow as tf
 import pywatts.db
-from pywatts.routines import *
+from pywatts.main import *


 PREDICT_QUERY = "query-sample_1hour.json"
 PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
-QUERY_ID = 0
+QUERY_ID = 1


 pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
@ -21,4 +21,4 @@ prediction = predict(n, pred_query)
 print(prediction)
 print(pred_result)

-pywatts.routines.eval_prediction(prediction, pred_result)
+pywatts.main.eval_prediction(prediction, pred_result)
--- a/pywatts/test_predict24.py
+++ b/pywatts/test_predict24.py
@ -1,27 +0,0 @@
-import tensorflow as tf
-import pywatts.db
-from pywatts.routines import *
-import matplotlib.pyplot as pp
-
-
-PREDICT_QUERY = "query-sample_24hour.json"
-PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
-QUERY_ID = 0
-
-
-pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
-pred_result = input_result("../sample_data/" + PREDICT_RESULT, QUERY_ID)
-
-
-# Define feature columns and initialize Regressor
-feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
-n = pywatts.neural.Net(feature_cols=feature_col)
-
-prediction = predict24h(n, pred_query)
-
-print(prediction)
-print(pred_result)
-
-pp.plot(pred_result, 'black')
-pp.plot(prediction, 'red')
-pp.show()
--- a/pywatts/test_train.py
+++ b/pywatts/test_train.py
@ -1,7 +1,7 @@
+import peewee
 import tensorflow as tf
-
 import pywatts.db
-from pywatts.routines import *
+from pywatts.main import *

 NUM_STATIONS_FROM_DB = 75
 NUM_TRAIN_STATIONS = 400
@ -43,7 +43,7 @@ if TRAIN:

 if PLOT:
    # Plot training success rate (with 'average loss')
-    pywatts.routines.plot_training(train_eval)
+    pywatts.main.plot_training(train_eval)


 exit()