Compare commits

..

No commits in common. "master" and "kcross" have entirely different histories.

14 changed files with 70 additions and 272 deletions

6
.gitignore vendored
View File

@ -111,9 +111,3 @@ venv.bak/
# Tensorflow Model
tf_pywatts_model/
# Tensorboard
pywatts/tensorboard
# Figures
figures/

View File

@ -1,27 +0,0 @@
PyWatts - Predict Output of Solar Panels
# Dependencies
PyWatts is based on python3.6 and uses the following dependencies:
* requests (2.19.1)
* pypvwatts (2.1.0)
* numpy (1.15.0)
* peewee (3.5.4)
* scikit-learn (0.19.2)
* pandas (0.23.4)
* tensorflow (1.9.0)
* matplotlib (2.2.3)
* scipy (1.1.0)
We suggest using a python virtualenv.
# Execute
The script can be executed by issuing the follwing command:
```bash
$ python photovoltaic_gruppe4.py data.json
```
The output can be found in the same directory in `test_data_gruppe4.json`

View File

@ -1,46 +0,0 @@
import os
import sys
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
# get rid of TF debug message
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
if len(sys.argv) != 2:
print("Usage: python photovoltaic_gruppe4.py <file.json>")
exit(1)
json_file = sys.argv[1] # json file
oneH, queries = input_queries(json_file)
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
predictions = []
total = len(queries)
for idx, query in enumerate(queries):
percent = idx / total
sys.stdout.write("\r")
progress = ""
for i in range(20):
if i < int(20 * percent):
progress += "="
else:
progress += " "
sys.stdout.write("[ %s ] %.2f%%" % (progress, percent * 100))
sys.stdout.flush()
if oneH:
predictions.extend(predict(n, query).astype('Float64').tolist())
else:
predictions.append(predict24h(n, query))
print(predictions, file=open("test_data_gruppe4.json", "w"))
sys.stdout.write("\r")
print("[ ==================== ] 100.00%")

View File

@ -1,5 +1,5 @@
from pywatts import db
from pywatts import fetchdata
from pywatts import neural
from pywatts import routines
from pywatts import main
from pywatts import kcross

View File

@ -1,11 +0,0 @@
import tensorflow as tf
import subprocess
writer = tf.summary.FileWriter("tensorboard")
checkpoint = tf.train.get_checkpoint_state('tf_pywatts_model_best')
with tf.Session() as sess:
saver = tf.train.import_meta_graph(checkpoint.model_checkpoint_path + '.meta')
saver.restore(sess, checkpoint.model_checkpoint_path)
writer.add_graph(sess.graph)
subprocess.check_output(['tensorboard', '--logdir', 'tensorboard'])

View File

@ -6,7 +6,8 @@ from playhouse.sqlite_ext import SqliteExtDatabase
import os.path
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
db_path = os.path.join(BASE_DIR, "pywatts.db")
db_path = os.path.join(BASE_DIR, "../pywatts.db")
print(db_path)
db = SqliteExtDatabase(db_path)
@ -34,14 +35,21 @@ class Result(Model):
def rows_to_df(indices):
temps = []
dcs = []
winds = []
db.connect()
for result in Result.select().where(Result.id << indices):
temps += result.temperature
dcs += result.dc_output
winds += result.wind_speed
db.close()
return pd.DataFrame(
{'dc': dcs})
{'temp': temps,
'dc': dcs,
'wind': winds
})

View File

@ -1,74 +0,0 @@
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
from pywatts import kcross
NUM_STATIONS_FROM_DB = 75
K = 10
NUM_EVAL_STATIONS = 40
TRAIN = True
PLOT = True
TRAIN_STEPS = 10
TOTAL_STEPS = 6
NUM_QUERIES = 5
PREDICT_QUERY = "query-sample_24hour.json"
PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
FIGURE_OUTPUT_DIR = "../figures/"
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
X = df
y = df['dc']
# Define feature columns and initialize Regressor
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
# Training data
(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
if TRAIN:
train_eval = None
color_gradient_base = (0.5, 0, 0)
color_step_width = (0.5/TOTAL_STEPS, 0, 0)
for i in range(TOTAL_STEPS):
# Train the model with the steps given
train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
for q in range(NUM_QUERIES):
pred_query = input_query("../sample_data/" + PREDICT_QUERY, q)
pred_result = input_result("../sample_data/" + PREDICT_RESULT, q)
prediction = predict24h(n, pred_query)
pp.figure(q)
if i == 0:
pp.plot(pred_result, 'black')
pp.plot(prediction, color=color_gradient_base)
pp.savefig(FIGURE_OUTPUT_DIR+'{}.pdf'.format(q), orientation='landscape')
color_gradient_base = tuple([sum(x) for x in zip(color_gradient_base, color_step_width)])
for i in range(NUM_QUERIES):
pp.close(i)
if PLOT:
# Plot training success rate (with 'average loss')
loss = []
for e in train_eval:
loss.append(e['average_loss'])
pp.plot(loss)
# Needed for execution in PyCharm
pp.show()
exit()

View File

@ -1,4 +1,6 @@
import random
import itertools
from pywatts import db
def split(data, k):
@ -16,42 +18,58 @@ def split(data, k):
data_list = data['dc'].tolist()
# Each sample has 337 elements
samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 30)]
samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)]
# Randomly shuffle samples
random.shuffle(samples)
bucketsize = int(len(samples) / k)
print(k)
print(len(data))
print(len(samples))
print(bucketsize)
# K steps
for i in range(k):
eval_dict = []
train_dict = []
eval_samples = []
train_samples = []
for j in range(k):
if j == i:
eval_samples.extend(samples[j*bucketsize:(j+1)*bucketsize])
eval_samples.extend(samples[i*bucketsize:(i+1)*bucketsize])
else:
train_samples.extend(samples[j*bucketsize:(j+1)*bucketsize])
train_samples.extend(samples[i*bucketsize:(i+1)*bucketsize])
# Create new dictionaries in the eval lists
X_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 != 0]})
y_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 == 0]})
for s in eval_samples:
# Create new dictionaries in the eval lists
X_eval.append({'dc': s[:-1]})
y_eval.append({'dc': s[-1]})
X_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 != 0]})
y_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 == 0]})
for s in train_samples:
X_train.append({'dc': s[:-1]})
y_train.append({'dc': s[-1]})
print(len(X_train) / 12)
#print(X_train)
#print(y_train)
exit(0)
return X_train, y_train, X_eval, y_eval
def train(nn, X_train, y_train, X_eval, y_eval, steps=100):
def train(nn, X_train, y_train, X_eval, y_eval, steps=10):
"""Trains the Network nn using k-cross-validation"""
evaluation = []
for count, train_data in enumerate(X_train):
for i in range(steps):
nn.train(train_data, y_train[count], batch_size=1000, steps=30) #batch_size=int(len(train_data['dc'])/336), steps=1)
evaluation.append(nn.evaluate(X_eval[count], y_eval[count]))
nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1)
print(X_eval[count])
print(len(X_eval[count]['dc']))
print(y_eval[count])
evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
print("Training %s: %s/%s" % (count, (i+1), steps))
return evaluation

View File

@ -9,7 +9,7 @@ from random import randint
def train_split(data, size):
used_idxs = []
X_values = {'dc': []}
X_values = {'dc': [], 'temp': [], 'wind': []}
y_values = []
for i in range(size):
rnd_idx = randint(0, data.size / data.shape[1] - 337)
@ -20,6 +20,8 @@ def train_split(data, size):
used_idxs.append(rnd_idx)
X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist())
X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336].tolist())
X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336].tolist())
y_values.append(data['dc'][rnd_idx + 337].tolist())
return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values})
@ -29,25 +31,11 @@ def input_query(json_str, idx=0):
tmp_df = pandas.read_json(json_str)
return pandas.DataFrame.from_dict(
{'dc': tmp_df['dc'][idx]}
{'dc': tmp_df['dc'][idx],
'temp': tmp_df['temp'][idx],
'wind': tmp_df['wind'][idx]}
)
def input_queries(json_str):
tmp_df = pandas.read_json(json_str)
oneH = False
try:
s = tmp_df['max_temp'][0]
except KeyError:
oneH = True
queries = []
for i in range(len(tmp_df)):
queries.append(pandas.DataFrame.from_dict(
{'dc': tmp_df['dc'][i]}
))
return oneH, queries
def input_result(json_str, idx=0):
tmp_df = pandas.read_json(json_str)
@ -66,37 +54,17 @@ def train(nn, X_train, y_train, X_val, y_val, steps=100):
def plot_training(evaluation):
loss = []
steps = []
for e in evaluation:
loss.append(e['loss'])
steps.append(e['global_step'])
loss.append(e['average_loss'])
pp.plot(steps, loss)
pp.plot(loss)
# Needed for execution in PyCharm
pp.show()
def predict(nn, X_pred):
pred = nn.predict1h(X_pred)
# Cap results to 0
predictions = np.array([max(p['predictions'], [0]) for p in pred])
return predictions
def predict24h(nn, X_pred):
predictions = []
input = {'dc': X_pred['dc'].tolist()}
for i in range(24):
pred = nn.predict1h(pandas.DataFrame.from_dict(input))
# Cap prediction to 0
predictions.extend(list([max(p['predictions'][0], 0) for p in pred]))
# Remove first value and append predicted value
del input['dc'][0]
input['dc'].append(predictions[-1])
# print("Prediction for hour %d/%d" % (i+1, 24))
predictions = np.array([p['predictions'] for p in pred])
return predictions
@ -108,9 +76,3 @@ def eval_prediction(prediction, result):
print("The Median Absolute Error: %.2f volt dc" % median_absolute_error(
result, prediction))
def jsonify(predictions):
json_out = "["
for v in predictions:
json_out += "[" + str(v) + "],"
json_out = json_out[:-1] + "]"
return json_out

View File

@ -1,9 +1,12 @@
import pandas
import numpy as np
import tensorflow as tf
def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
# Create dictionary for features in hour 0 ... 335
features = {str(idx): [] for idx in range(336)}
#dc_values = X['dc'].tolist()
dc_values = X['dc']
# Iterate the empty dictionary always adding the idx-th element from the dc_values list
@ -12,6 +15,7 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
labels = None
if y is not None:
#labels = y['dc'].values
labels = y['dc']
if labels is None:
@ -19,22 +23,19 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
else:
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
if num_epochs is not None:
return dataset.batch(len(features['0']))
if shuffle:
return dataset.shuffle(len(features['0']*len(features)*4)).repeat().batch(batch_size)
else:
return dataset.batch(batch_size)
dataset.shuffle(len(features['0']))
return dataset.batch(batch_size)
class Net:
__regressor = None
__feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc']]
__feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc', 'temp', 'wind']]
def __init__(self, feature_cols=__feature_cols):
self.__regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols,
hidden_units=[64, 128, 64],
hidden_units=[75, 75],
model_dir='tf_pywatts_model')
def train(self, training_data, training_results, batch_size, steps):

View File

@ -1,14 +1,14 @@
import peewee
import tensorflow as tf
import pywatts.db
from pywatts import kcross
NUM_STATIONS_FROM_DB = 75
K = 10
K = 4
NUM_EVAL_STATIONS = 40
TRAIN = True
PLOT = True
TRAIN_STEPS = 10
TRAIN_STEPS = 4
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
@ -25,7 +25,7 @@ n = pywatts.neural.Net(feature_cols=feature_col)
(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
#train_eval = {}
train_eval = {}
if TRAIN:
# Train the model with the steps given
@ -35,7 +35,7 @@ if TRAIN:
if PLOT:
# Plot training success rate (with 'average loss')
pywatts.routines.plot_training(train_eval)
pywatts.main.plot_training(train_eval)
exit()

View File

@ -1,11 +1,11 @@
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
from pywatts.main import *
PREDICT_QUERY = "query-sample_1hour.json"
PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
QUERY_ID = 0
QUERY_ID = 1
pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
@ -21,4 +21,4 @@ prediction = predict(n, pred_query)
print(prediction)
print(pred_result)
pywatts.routines.eval_prediction(prediction, pred_result)
pywatts.main.eval_prediction(prediction, pred_result)

View File

@ -1,27 +0,0 @@
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
import matplotlib.pyplot as pp
PREDICT_QUERY = "query-sample_24hour.json"
PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
QUERY_ID = 0
pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
pred_result = input_result("../sample_data/" + PREDICT_RESULT, QUERY_ID)
# Define feature columns and initialize Regressor
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
prediction = predict24h(n, pred_query)
print(prediction)
print(pred_result)
pp.plot(pred_result, 'black')
pp.plot(prediction, 'red')
pp.show()

View File

@ -1,7 +1,7 @@
import peewee
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
from pywatts.main import *
NUM_STATIONS_FROM_DB = 75
NUM_TRAIN_STATIONS = 400
@ -43,7 +43,7 @@ if TRAIN:
if PLOT:
# Plot training success rate (with 'average loss')
pywatts.routines.plot_training(train_eval)
pywatts.main.plot_training(train_eval)
exit()