Compare commits

..

No commits in common. "master" and "kcross" have entirely different histories.

14 changed files with 70 additions and 272 deletions

6
.gitignore vendored
View File

@ -111,9 +111,3 @@ venv.bak/
# Tensorflow Model # Tensorflow Model
tf_pywatts_model/ tf_pywatts_model/
# Tensorboard
pywatts/tensorboard
# Figures
figures/

View File

@ -1,27 +0,0 @@
PyWatts - Predict Output of Solar Panels
# Dependencies
PyWatts is based on python3.6 and uses the following dependencies:
* requests (2.19.1)
* pypvwatts (2.1.0)
* numpy (1.15.0)
* peewee (3.5.4)
* scikit-learn (0.19.2)
* pandas (0.23.4)
* tensorflow (1.9.0)
* matplotlib (2.2.3)
* scipy (1.1.0)
We suggest using a python virtualenv.
# Execute
The script can be executed by issuing the follwing command:
```bash
$ python photovoltaic_gruppe4.py data.json
```
The output can be found in the same directory in `test_data_gruppe4.json`

View File

@ -1,46 +0,0 @@
import os
import sys
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
# get rid of TF debug message
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
if len(sys.argv) != 2:
print("Usage: python photovoltaic_gruppe4.py <file.json>")
exit(1)
json_file = sys.argv[1] # json file
oneH, queries = input_queries(json_file)
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
predictions = []
total = len(queries)
for idx, query in enumerate(queries):
percent = idx / total
sys.stdout.write("\r")
progress = ""
for i in range(20):
if i < int(20 * percent):
progress += "="
else:
progress += " "
sys.stdout.write("[ %s ] %.2f%%" % (progress, percent * 100))
sys.stdout.flush()
if oneH:
predictions.extend(predict(n, query).astype('Float64').tolist())
else:
predictions.append(predict24h(n, query))
print(predictions, file=open("test_data_gruppe4.json", "w"))
sys.stdout.write("\r")
print("[ ==================== ] 100.00%")

View File

@ -1,5 +1,5 @@
from pywatts import db from pywatts import db
from pywatts import fetchdata from pywatts import fetchdata
from pywatts import neural from pywatts import neural
from pywatts import routines from pywatts import main
from pywatts import kcross from pywatts import kcross

View File

@ -1,11 +0,0 @@
import tensorflow as tf
import subprocess
writer = tf.summary.FileWriter("tensorboard")
checkpoint = tf.train.get_checkpoint_state('tf_pywatts_model_best')
with tf.Session() as sess:
saver = tf.train.import_meta_graph(checkpoint.model_checkpoint_path + '.meta')
saver.restore(sess, checkpoint.model_checkpoint_path)
writer.add_graph(sess.graph)
subprocess.check_output(['tensorboard', '--logdir', 'tensorboard'])

View File

@ -6,7 +6,8 @@ from playhouse.sqlite_ext import SqliteExtDatabase
import os.path import os.path
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = os.path.dirname(os.path.abspath(__file__))
db_path = os.path.join(BASE_DIR, "pywatts.db") db_path = os.path.join(BASE_DIR, "../pywatts.db")
print(db_path)
db = SqliteExtDatabase(db_path) db = SqliteExtDatabase(db_path)
@ -34,14 +35,21 @@ class Result(Model):
def rows_to_df(indices): def rows_to_df(indices):
temps = []
dcs = [] dcs = []
winds = []
db.connect() db.connect()
for result in Result.select().where(Result.id << indices): for result in Result.select().where(Result.id << indices):
temps += result.temperature
dcs += result.dc_output dcs += result.dc_output
winds += result.wind_speed
db.close() db.close()
return pd.DataFrame( return pd.DataFrame(
{'dc': dcs}) {'temp': temps,
'dc': dcs,
'wind': winds
})

View File

@ -1,74 +0,0 @@
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
from pywatts import kcross
NUM_STATIONS_FROM_DB = 75
K = 10
NUM_EVAL_STATIONS = 40
TRAIN = True
PLOT = True
TRAIN_STEPS = 10
TOTAL_STEPS = 6
NUM_QUERIES = 5
PREDICT_QUERY = "query-sample_24hour.json"
PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
FIGURE_OUTPUT_DIR = "../figures/"
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
X = df
y = df['dc']
# Define feature columns and initialize Regressor
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
# Training data
(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
if TRAIN:
train_eval = None
color_gradient_base = (0.5, 0, 0)
color_step_width = (0.5/TOTAL_STEPS, 0, 0)
for i in range(TOTAL_STEPS):
# Train the model with the steps given
train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
for q in range(NUM_QUERIES):
pred_query = input_query("../sample_data/" + PREDICT_QUERY, q)
pred_result = input_result("../sample_data/" + PREDICT_RESULT, q)
prediction = predict24h(n, pred_query)
pp.figure(q)
if i == 0:
pp.plot(pred_result, 'black')
pp.plot(prediction, color=color_gradient_base)
pp.savefig(FIGURE_OUTPUT_DIR+'{}.pdf'.format(q), orientation='landscape')
color_gradient_base = tuple([sum(x) for x in zip(color_gradient_base, color_step_width)])
for i in range(NUM_QUERIES):
pp.close(i)
if PLOT:
# Plot training success rate (with 'average loss')
loss = []
for e in train_eval:
loss.append(e['average_loss'])
pp.plot(loss)
# Needed for execution in PyCharm
pp.show()
exit()

View File

@ -1,4 +1,6 @@
import random import random
import itertools
from pywatts import db
def split(data, k): def split(data, k):
@ -16,42 +18,58 @@ def split(data, k):
data_list = data['dc'].tolist() data_list = data['dc'].tolist()
# Each sample has 337 elements # Each sample has 337 elements
samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 30)] samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)]
# Randomly shuffle samples # Randomly shuffle samples
random.shuffle(samples) random.shuffle(samples)
bucketsize = int(len(samples) / k) bucketsize = int(len(samples) / k)
print(k)
print(len(data))
print(len(samples))
print(bucketsize)
# K steps # K steps
for i in range(k): for i in range(k):
eval_dict = []
train_dict = []
eval_samples = [] eval_samples = []
train_samples = [] train_samples = []
for j in range(k): for j in range(k):
if j == i: if j == i:
eval_samples.extend(samples[j*bucketsize:(j+1)*bucketsize]) eval_samples.extend(samples[i*bucketsize:(i+1)*bucketsize])
else: else:
train_samples.extend(samples[j*bucketsize:(j+1)*bucketsize]) train_samples.extend(samples[i*bucketsize:(i+1)*bucketsize])
# Create new dictionaries in the eval lists for s in eval_samples:
X_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 != 0]}) # Create new dictionaries in the eval lists
y_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 == 0]}) X_eval.append({'dc': s[:-1]})
y_eval.append({'dc': s[-1]})
X_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 != 0]}) for s in train_samples:
y_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 == 0]}) X_train.append({'dc': s[:-1]})
y_train.append({'dc': s[-1]})
print(len(X_train) / 12)
#print(X_train)
#print(y_train)
exit(0)
return X_train, y_train, X_eval, y_eval return X_train, y_train, X_eval, y_eval
def train(nn, X_train, y_train, X_eval, y_eval, steps=100): def train(nn, X_train, y_train, X_eval, y_eval, steps=10):
"""Trains the Network nn using k-cross-validation""" """Trains the Network nn using k-cross-validation"""
evaluation = [] evaluation = []
for count, train_data in enumerate(X_train): for count, train_data in enumerate(X_train):
for i in range(steps): for i in range(steps):
nn.train(train_data, y_train[count], batch_size=1000, steps=30) #batch_size=int(len(train_data['dc'])/336), steps=1) nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1)
evaluation.append(nn.evaluate(X_eval[count], y_eval[count])) print(X_eval[count])
print(len(X_eval[count]['dc']))
print(y_eval[count])
evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
print("Training %s: %s/%s" % (count, (i+1), steps)) print("Training %s: %s/%s" % (count, (i+1), steps))
return evaluation

View File

@ -9,7 +9,7 @@ from random import randint
def train_split(data, size): def train_split(data, size):
used_idxs = [] used_idxs = []
X_values = {'dc': []} X_values = {'dc': [], 'temp': [], 'wind': []}
y_values = [] y_values = []
for i in range(size): for i in range(size):
rnd_idx = randint(0, data.size / data.shape[1] - 337) rnd_idx = randint(0, data.size / data.shape[1] - 337)
@ -20,6 +20,8 @@ def train_split(data, size):
used_idxs.append(rnd_idx) used_idxs.append(rnd_idx)
X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist()) X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist())
X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336].tolist())
X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336].tolist())
y_values.append(data['dc'][rnd_idx + 337].tolist()) y_values.append(data['dc'][rnd_idx + 337].tolist())
return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values}) return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values})
@ -29,25 +31,11 @@ def input_query(json_str, idx=0):
tmp_df = pandas.read_json(json_str) tmp_df = pandas.read_json(json_str)
return pandas.DataFrame.from_dict( return pandas.DataFrame.from_dict(
{'dc': tmp_df['dc'][idx]} {'dc': tmp_df['dc'][idx],
'temp': tmp_df['temp'][idx],
'wind': tmp_df['wind'][idx]}
) )
def input_queries(json_str):
tmp_df = pandas.read_json(json_str)
oneH = False
try:
s = tmp_df['max_temp'][0]
except KeyError:
oneH = True
queries = []
for i in range(len(tmp_df)):
queries.append(pandas.DataFrame.from_dict(
{'dc': tmp_df['dc'][i]}
))
return oneH, queries
def input_result(json_str, idx=0): def input_result(json_str, idx=0):
tmp_df = pandas.read_json(json_str) tmp_df = pandas.read_json(json_str)
@ -66,37 +54,17 @@ def train(nn, X_train, y_train, X_val, y_val, steps=100):
def plot_training(evaluation): def plot_training(evaluation):
loss = [] loss = []
steps = []
for e in evaluation: for e in evaluation:
loss.append(e['loss']) loss.append(e['average_loss'])
steps.append(e['global_step'])
pp.plot(steps, loss) pp.plot(loss)
# Needed for execution in PyCharm # Needed for execution in PyCharm
pp.show() pp.show()
def predict(nn, X_pred): def predict(nn, X_pred):
pred = nn.predict1h(X_pred) pred = nn.predict1h(X_pred)
# Cap results to 0 predictions = np.array([p['predictions'] for p in pred])
predictions = np.array([max(p['predictions'], [0]) for p in pred])
return predictions
def predict24h(nn, X_pred):
predictions = []
input = {'dc': X_pred['dc'].tolist()}
for i in range(24):
pred = nn.predict1h(pandas.DataFrame.from_dict(input))
# Cap prediction to 0
predictions.extend(list([max(p['predictions'][0], 0) for p in pred]))
# Remove first value and append predicted value
del input['dc'][0]
input['dc'].append(predictions[-1])
# print("Prediction for hour %d/%d" % (i+1, 24))
return predictions return predictions
@ -108,9 +76,3 @@ def eval_prediction(prediction, result):
print("The Median Absolute Error: %.2f volt dc" % median_absolute_error( print("The Median Absolute Error: %.2f volt dc" % median_absolute_error(
result, prediction)) result, prediction))
def jsonify(predictions):
json_out = "["
for v in predictions:
json_out += "[" + str(v) + "],"
json_out = json_out[:-1] + "]"
return json_out

View File

@ -1,9 +1,12 @@
import pandas
import numpy as np
import tensorflow as tf import tensorflow as tf
def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
# Create dictionary for features in hour 0 ... 335 # Create dictionary for features in hour 0 ... 335
features = {str(idx): [] for idx in range(336)} features = {str(idx): [] for idx in range(336)}
#dc_values = X['dc'].tolist()
dc_values = X['dc'] dc_values = X['dc']
# Iterate the empty dictionary always adding the idx-th element from the dc_values list # Iterate the empty dictionary always adding the idx-th element from the dc_values list
@ -12,6 +15,7 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
labels = None labels = None
if y is not None: if y is not None:
#labels = y['dc'].values
labels = y['dc'] labels = y['dc']
if labels is None: if labels is None:
@ -19,22 +23,19 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
else: else:
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
if num_epochs is not None:
return dataset.batch(len(features['0']))
if shuffle: if shuffle:
return dataset.shuffle(len(features['0']*len(features)*4)).repeat().batch(batch_size) dataset.shuffle(len(features['0']))
else:
return dataset.batch(batch_size) return dataset.batch(batch_size)
class Net: class Net:
__regressor = None __regressor = None
__feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc']] __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc', 'temp', 'wind']]
def __init__(self, feature_cols=__feature_cols): def __init__(self, feature_cols=__feature_cols):
self.__regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, self.__regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols,
hidden_units=[64, 128, 64], hidden_units=[75, 75],
model_dir='tf_pywatts_model') model_dir='tf_pywatts_model')
def train(self, training_data, training_results, batch_size, steps): def train(self, training_data, training_results, batch_size, steps):

View File

@ -1,14 +1,14 @@
import peewee
import tensorflow as tf import tensorflow as tf
import pywatts.db import pywatts.db
from pywatts import kcross from pywatts import kcross
NUM_STATIONS_FROM_DB = 75 NUM_STATIONS_FROM_DB = 75
K = 10 K = 4
NUM_EVAL_STATIONS = 40 NUM_EVAL_STATIONS = 40
TRAIN = True TRAIN = True
PLOT = True PLOT = True
TRAIN_STEPS = 10 TRAIN_STEPS = 4
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB))) df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
@ -25,7 +25,7 @@ n = pywatts.neural.Net(feature_cols=feature_col)
(X_train, y_train, X_eval, y_eval) = kcross.split(df, K) (X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
#train_eval = {} train_eval = {}
if TRAIN: if TRAIN:
# Train the model with the steps given # Train the model with the steps given
@ -35,7 +35,7 @@ if TRAIN:
if PLOT: if PLOT:
# Plot training success rate (with 'average loss') # Plot training success rate (with 'average loss')
pywatts.routines.plot_training(train_eval) pywatts.main.plot_training(train_eval)
exit() exit()

View File

@ -1,11 +1,11 @@
import tensorflow as tf import tensorflow as tf
import pywatts.db import pywatts.db
from pywatts.routines import * from pywatts.main import *
PREDICT_QUERY = "query-sample_1hour.json" PREDICT_QUERY = "query-sample_1hour.json"
PREDICT_RESULT = PREDICT_QUERY.replace("query", "result") PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
QUERY_ID = 0 QUERY_ID = 1
pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID) pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
@ -21,4 +21,4 @@ prediction = predict(n, pred_query)
print(prediction) print(prediction)
print(pred_result) print(pred_result)
pywatts.routines.eval_prediction(prediction, pred_result) pywatts.main.eval_prediction(prediction, pred_result)

View File

@ -1,27 +0,0 @@
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
import matplotlib.pyplot as pp
PREDICT_QUERY = "query-sample_24hour.json"
PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
QUERY_ID = 0
pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
pred_result = input_result("../sample_data/" + PREDICT_RESULT, QUERY_ID)
# Define feature columns and initialize Regressor
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
prediction = predict24h(n, pred_query)
print(prediction)
print(pred_result)
pp.plot(pred_result, 'black')
pp.plot(prediction, 'red')
pp.show()

View File

@ -1,7 +1,7 @@
import peewee
import tensorflow as tf import tensorflow as tf
import pywatts.db import pywatts.db
from pywatts.routines import * from pywatts.main import *
NUM_STATIONS_FROM_DB = 75 NUM_STATIONS_FROM_DB = 75
NUM_TRAIN_STATIONS = 400 NUM_TRAIN_STATIONS = 400
@ -43,7 +43,7 @@ if TRAIN:
if PLOT: if PLOT:
# Plot training success rate (with 'average loss') # Plot training success rate (with 'average loss')
pywatts.routines.plot_training(train_eval) pywatts.main.plot_training(train_eval)
exit() exit()