{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'G:\\\\work\\\\1116_19ZWD124F\\\\data\\\\model_M\\\\COAD\\\\model'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pwd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import io\n", "import sys\n", "import time\n", "from warnings import filterwarnings\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import scipy\n", "import sklearn\n", "#from sklearn.metrics import plot_roc_curve\n", "import sklearn.metrics as mtr\n", "from scipy import stats\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.ensemble import GradientBoostingClassifier\n", "from sklearn.ensemble import ExtraTreesRegressor\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.svm import SVC\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.feature_selection import RFE,RFECV\n", "from sklearn.feature_selection import VarianceThreshold\n", "from sklearn.model_selection import StratifiedKFold, train_test_split, KFold\n", "from sklearn.metrics import roc_curve, auc\n", "#from sklearn.pipeline import make_pipeline,Pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "#from matplotlib.backends.backend_pdf import PdfPages\n", "from catboost import CatBoostClassifier,CatBoostRegressor,cv,Pool\n", "#import hyperopt" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "all_time_start = time.time()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "raw_data = pd.read_csv ('./M_expr.txt',sep='\\t',header=0) " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ENSG00000204542.2 | \n", "ENSG00000064195.7 | \n", "ENSG00000276122.1 | \n", "ENSG00000156076.8 | \n", "ENSG00000106536.18 | \n", "ENSG00000242575.1 | \n", "ENSG00000168243.9 | \n", "ENSG00000250641.1 | \n", "ENSG00000204889.9 | \n", "ENSG00000231826.4 | \n", "... | \n", "ENSG00000177519.3 | \n", "ENSG00000140807.5 | \n", "ENSG00000166415.13 | \n", "ENSG00000211938.2 | \n", "ENSG00000133665.11 | \n", "ENSG00000211973.2 | \n", "ENSG00000183036.9 | \n", "ENSG00000205293.3 | \n", "ENSG00000211947.2 | \n", "label | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TCGA-AA-3870-01A | \n", "9.192293 | \n", "0.000000 | \n", "3.700440 | \n", "1.000000 | \n", "6.569856 | \n", "0.000000 | \n", "4.000000 | \n", "2.000000 | \n", "0.000000 | \n", "5.672425 | \n", "... | \n", "2.807355 | \n", "8.607330 | \n", "7.658211 | \n", "1.584963 | \n", "0.000000 | \n", "6.700440 | \n", "4.807355 | \n", "1.000000 | \n", "8.438792 | \n", "1 | \n", "
TCGA-AZ-6600-01A | \n", "8.873444 | \n", "3.000000 | \n", "1.000000 | \n", "0.000000 | \n", "4.643856 | \n", "1.000000 | \n", "12.692398 | \n", "5.807355 | \n", "6.044394 | \n", "7.348728 | \n", "... | \n", "2.000000 | \n", "9.854868 | \n", "10.816984 | \n", "6.491853 | \n", "1.000000 | \n", "8.413628 | \n", "3.807355 | \n", "5.247928 | \n", "10.004220 | \n", "1 | \n", "
TCGA-A6-2671-01A | \n", "4.857981 | \n", "4.247928 | \n", "1.584963 | \n", "7.924813 | \n", "8.438792 | \n", "0.000000 | \n", "11.672425 | \n", "8.710806 | \n", "0.000000 | \n", "8.562242 | \n", "... | \n", "3.459432 | \n", "10.452241 | \n", "6.087463 | \n", "1.000000 | \n", "1.584963 | \n", "2.584963 | \n", "6.942515 | \n", "0.000000 | \n", "3.459432 | \n", "1 | \n", "
TCGA-A6-5662-01A | \n", "11.465056 | \n", "6.954196 | \n", "5.357552 | \n", "11.619303 | \n", "0.000000 | \n", "5.357552 | \n", "12.078151 | \n", "10.924813 | \n", "10.435670 | \n", "6.754888 | \n", "... | \n", "4.459432 | \n", "15.149152 | \n", "12.563911 | \n", "0.000000 | \n", "6.169925 | \n", "1.584963 | \n", "7.066089 | \n", "3.459432 | \n", "4.954196 | \n", "1 | \n", "
TCGA-AA-3696-01A | \n", "2.321928 | \n", "2.321928 | \n", "6.768184 | \n", "0.000000 | \n", "5.357552 | \n", "0.000000 | \n", "9.231221 | \n", "5.643856 | \n", "5.584963 | \n", "5.247928 | \n", "... | \n", "1.584963 | \n", "10.726218 | \n", "6.539159 | \n", "0.000000 | \n", "2.584963 | \n", "0.000000 | \n", "4.392317 | \n", "2.000000 | \n", "3.700440 | \n", "1 | \n", "
5 rows × 130 columns
\n", "\n", " | Accuracy test | \n", "AUC test | \n", "Number of features | \n", "Accuracy RFE | \n", "AUC RFE | \n", "
---|---|---|---|---|---|
LR | \n", "0.736842 | \n", "0.705729 | \n", "101 | \n", "0.763158 | \n", "0.691406 | \n", "
NN | \n", "0.815789 | \n", "0.804688 | \n", "129 | \n", "0.815789 | \n", "0.804688 | \n", "
SVM | \n", "0.842105 | \n", "0.643229 | \n", "129 | \n", "0.842105 | \n", "0.643229 | \n", "
RF | \n", "0.828947 | \n", "0.636068 | \n", "100 | \n", "0.828947 | \n", "0.699219 | \n", "
GBDT | \n", "0.763158 | \n", "0.574219 | \n", "59 | \n", "0.763158 | \n", "0.595052 | \n", "
Catboost | \n", "0.828947 | \n", "0.692708 | \n", "109 | \n", "0.815789 | \n", "0.695312 | \n", "