Random Forest User
from random import seed from random import randrange from csv import reader import re import csv from ast import literal_eval
- Import the random forest
with open('A forest.csv', 'r') as file:
Forest = [{k: literal_eval(v) for k, v in row.items()} for row in csv.DictReader(file, skipinitialspace=True)]
def load_csv(filename): dataset = list() with open(filename, 'r') as file: csv_reader = reader(file) for row in csv_reader: if not row: continue dataset.append(row) return dataset
- Makes an array of the data. Each row is a point in time and
- each column is a channel, except for the last column, which contains
- the desired output.
def make_data(dataname, startname, labelname):
data = load_csv(dataname) starttimes = load_csv(startname) labels = load_csv(labelname) numtrials = len(labels) regex = r"NaN\s+" #Convert the data, each row is one second and each column is one channel for i in range(0, len(data)): data[i] = [float(j) for j in data[i][0].split()] #Convert starttimes and labels. for labels, 0 indicates a test trial for i in range(0, numtrials): starttimes[i] = int(starttimes[i][0]) if re.search(regex, labels[i][0]): labels[i] = 0 else: labels[i] = int(labels[i][0]) #Add the labels to the data matrix for i in range(0,numtrials): if i == 0: begin, end = 0, starttimes[0] else: begin, end = starttimes[i-1], starttimes[i] for j in range(begin, end): if i == 0: data[j].append(0) else: data[j].append(labels[i]) for j in range(starttimes[-1], len(data)): data[j].append(labels[-1]) return data
- Delete the rows with an unknown desired output
def delete_test_trials(data):
new_data = list() for row in data: if row[-1] != 0: new_data.append(row) return new_data
- Make a smaller set without replacement
def smaller_set(data, n_rows):
data_copy = data new_data = list() while len(new_data) < n_rows: index = randrange(0, len(data_copy)) new_data.append(data_copy[index]) data_copy.remove(data_copy[index]) return new_data
- The methods needed for predicting an outcome:
- Make a prediction with a list of bagged trees
def bagging_predict(trees, row): predictions = [predict(tree, row) for tree in trees] return max(set(predictions), key=predictions.count)
- Make a prediction with a decision tree
def predict(node, row):
if row[node['index']] < node['value']: if isinstance(node['left'], dict): return predict(node['left'], row) else: return node['left'] else: if isinstance(node['right'], dict): return predict(node['right'], row) else: return node['right']
- Testing to see if we can get an outcome
seed(2)
dataname = 'k3b_s.txt' startname = 'k3b_HDR_TRIG.txt' labelname = 'k3b_HDR_Classlabel.txt'
data = make_data(dataname, startname, labelname) train = delete_test_trials(data) faults = 0 misses = 0 first = True
for i in range(0,len(train)):
prediction = bagging_predict(Forest,train[i]) actual = train[i][-1] #print("Prediction: " + str(prediction) + ", Actual: " + str(actual)) if round(prediction) != actual: faults = faults + 1 if type(actual) is not int: misses = misses + 1 if first: print(i) first = False
print(faults/len(train)) print(misses/len(train))