Factorization Machinesバイナリの実行サンプル
-----------------------------------------------------------------------------------
# coding: utf-8
import subprocess
import numpy as np
import pandas as pd
def create_data():
"""
movie lens sample data 100k
https://grouplens.org/datasets/movielens/100k/
"""
data = pd.read_csv('u.data', names = ['user_id', 'mov_id', 'rating', 'time'], sep = '\t')
traindata = data.iloc[0:len(data)/2,:]
testdata = data.iloc[len(data)/2:len(data),:]
idmax = np.max(list(traindata.user_id))
with open('train.txt', 'w') as fp:
for i in range(len(traindata)):
feature_vector = '{0} {1}:1 {2}:1\n'.format(traindata.iloc[i,:].rating, traindata.iloc[i,:].user_id, traindata.iloc[i,:].mov_id + idmax)
fp.write(feature_vector)
with open('test.txt', 'w') as fp:
for i in range(len(testdata)):
feature_vector = '{0} {1}:1 {2}:1\n'.format(testdata.iloc[i,:].rating, testdata.iloc[i,:].user_id, testdata.iloc[i,:].mov_id + idmax)
fp.write(feature_vector)
def execute():
exec_cmd = r'.\libfm\libfm.exe -task r -train train.txt -test test.txt -out result.txt'
returncode = subprocess.call(exec_cmd, shell=True)
print returncode
if __name__ == '__main__':
# create_data()
execute()
PR