目次

概要

TODO:

実装

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# データ読み込み
data = np.loadtxt("sample.csv", delimiter=",", skiprows=1)

x = data[:, :3]
y = data[:, 3]
#y = np.transpose([data[:, 3]])
m = len(y)

# 正規化(Z-score normalization)
def norm(X):
    X_norm = np.zeros((X.shape[0], X.shape[1]))
    X_mean = np.zeros((1, X.shape[1]))
    X_std = np.zeros((1, X.shape[1]))
    for i in range(X.shape[1]):
        X_mean[:, i] = np.mean(X[:, i])
        X_std[:, i] = np.std(X[:, i])
        X_norm[:, i] = (X[:, i] - float(X_mean[:, i])) / float(X_std[:, i])
    return X_norm, X_mean, X_std

weight_int = np.zeros((4, 1))

# コスト関数
def cost(x, y, weight):
    m = len(y)
    J = 0
    y_hut = x.dot(weight)
    diff = np.power((y_hut - np.transpose([y])), 2)
    #diff = np.power((y_hut - y), 2)
    J = (1.0 / (2 * m)) * diff.sum(axis=0)
    return J

# コスト関数(ベクトル化版)
def cost2(x, y, weight):
    m = len(y)
    y_shaped = np.transpose([y])
    xw = x.dot(weight)
    result = np.dot((xw - y_shaped).T, (xw - y_shaped)) / (2*m)
    #J = (1.0 / (2 * m)) * result
    return result

def gradient_descent(x, y, weight, alpha, iter_num):
    m = len(y)
    j_hist = np.zeros((iter_num, 1))
    for i in range(iter_num):
        weight = weight - alpha * (1.0/m) * np.transpose(x).dot(x.dot(weight) - np.transpose([y]))
        j_hist[i] = cost(x, y, weight)
    return weight, j_hist


if __name__ == "__main__":

    # 学習率、学習回数
    alpha = 0.01
    iter_num = 500

    # 訓練データを正規化して、x0の列を追加する
    x_padded = np.column_stack([np.ones([m,1]), X_norm])

    # 正規化したデータを使用して最急降下法で W を求める
    weight, j_hist = gradient_descent(x_padded, y, weight_int, alpha, iter_num)

    print("weight: ", weight)

    # 学習毎のコストの可視化
    plt.plot(range(j_hist.size), j_hist[:, 0], "r")
    plt.xlabel("number of iterations")
    plt.ylabel("cost J")
    plt.grid(True)
    plt.show()

トップ   一覧 単語検索 最終更新   ヘルプ   最終更新のRSS