To run this notebook, first run the modified DNP package on terminal with:
python (or python3) run_DNP.py > dnp_output.txt
Here, we also included this command in the notebook as well.
In this case, num_cv = 10 (10-fold cross validation), num_repeat = 1 (run the experiment 1 time)
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
import warnings
warnings.filterwarnings('ignore')
%%time
# depend on how big is your data, this cell can run for several minutes
# If you got an error "No Module named mxnet" when running run_DNP.py,
# uncomment the next cell and run it to install mxnet
!python run_DNP.py > dnp_output.txt
# import sys
# !{sys.executable} -m pip install mxnet
%%time
# To run the experiment multiple times
# num_repeat = 5
# for i in range(num_repeat):
# if open('dnp_output.txt'):
# if i == 0:
# print('The file exists! Press capital Y to override it! Or any other key to exist.')
# if input() == 'Y':
# !python run_DNP.py > dnp_output.txt
# else:
# break
# !python run_DNP.py >> dnp_output.txt
file_ls = []
with open('dnp_output.txt', 'r') as f:
for line in f:
file_ls.append(line.strip())
# An example of what the line we want to process
file_ls[70]
features = [line for line in file_ls if line[:7] == 'feature']
features[:5]
feature_ids = [feature.split()[1] for feature in features]
feature_weights = [feature.split()[-1] for feature in features]
# Convert feature weights to pandas series
feature_weights = np.array(feature_weights, dtype=np.float32)
feature_weights = pd.Series(feature_weights, index=feature_ids)
# convert from python list to pandas series to use unique() method
feature_ids = pd.Series(feature_ids)
# get a list of unique ID from fid
# (repetative from k-fold cross validation and multiple repeats of the experiment)
unique_id = feature_ids.unique()
# k-fold cross validation, change the value in DNP.py
num_cv = 10
# repeat of experiments
num_repeat = 1
# calculate average activation potential AAP
aap = []
for id in unique_id:
id_mean = feature_weights[id].sum() / (num_cv * num_repeat)
aap.append(id_mean)
aap = np.array(aap)
aap = pd.Series(aap, index=unique_id)
# sort the aap in descending order
aap = aap.sort_values(ascending=False)
aap[:21]
import AAP
x = AAP.AAP(num_cv=num_cv, num_repeat=num_repeat, file_name='dnp_output.txt')
x[:21]