@article{Shinmura_2014, title={Improvement of CPU time of Linear Discriminant Function based on MNM criterion by IP}, volume={2}, url={http://iapress.org/index.php/soic/article/view/20140604}, DOI={10.19139/soic.v2i2.52}, abstractNote={<p>Revised IP-OLDF (optimal linear discriminant function by integer programming) is a linear discriminant function to minimize the number of misclassifications (NM) of training samples by integer programming (IP). However, IP requires large computation (CPU) time. In this paper, it is proposed how to reduce CPU time by using linear programming (LP). In the first phase, Revised LP-OLDF is applied to all cases, and all cases are categorized into two groups: those that are classified correctly or those that are not classified by support vectors (SVs). In the second phase, Revised IP-OLDF is applied to the misclassified cases by SVs. This method is called Revised IPLP-OLDF.</p><p>In this research, it is evaluated whether NM of Revised IPLP-OLDF is good estimate of the minimum number of misclassifications (MNM) by Revised IP-OLDF. Four kinds of the real data—Iris data, Swiss bank note data, student data, and CPD data—are used as training samples. Four kinds of 20,000 re-sampling cases generated from these data are used as the evaluation samples. There are a total of 149 models of all combinations of independent variables by these data. NMs and CPU times of the 149 models are compared with Revised IPLP-OLDF and Revised IP-OLDF. The following results are obtained: <br /> 1) Revised IPLP-OLDF significantly improves CPU time. <br /> 2) In the case of training samples, all 149 NMs of Revised IPLP-OLDF are equal to the MNM of Revised IP-OLDF. <br /> 3) In the case of evaluation samples, most NMs of Revised IPLP-OLDF are equal to NM of Revised IP-OLDF. <br /> 4) Generalization abilities of both discriminant functions are concluded to be high, because the difference between the error rates of training and evaluation samples are almost within 2%. <br /> Therefore, Revised IPLP-OLDF is recommended for the analysis of big data instead of Revised IP-OLDF. Next, Revised IPLP-OLDF is compared with LDF and logistic regression by 100-fold cross validation using 100 re-sampling samples. Means of error rates of Revised IPLP-OLDF are remarkable fewer than those of LDF and logistic regression.</p>}, number={2}, journal={Statistics, Optimization & Information Computing}, author={Shinmura, Shuichi}, year={2014}, month={Jun.}, pages={114-129} }