Skip to content
Snippets Groups Projects
Commit 279e64e8 authored by Joakim's avatar Joakim :speech_balloon:
Browse files

Uploaded Feature Importance Detector (FID) implementations (Python + and...

Uploaded Feature Importance Detector (FID)  implementations (Python + and Matlab) related to arXiv:2303.09453
parent b24ff98b
No related branches found
No related tags found
No related merge requests found
fid.m 0 → 100644
function [rX, rY, f2] = fid(X, Y, cutoff, plotToggle)
% Feature Importance Detector, published in the paper:
% "Knowledge Discovery from Atomic Structures using Feature Importances" (arXiv:2303.09453).
% Description:
% Performs FID feature selection.
%
% Inputs:
% X - Index vector.
% Y - Feature score vector. Assumed to be in descending
% order.
% cutoff - The hyperparameter of FID. Range [0,1]. Determines the number
% of features to be removed.
% plotToggle - Optional boolean input, default is false.
%
% Ouputs:
% rX - Feature selected X
% rY - Feature selected Y
% f2 - Boolean vector, used to from rX and rY out of X and Y.
%
if exist('plotToggle','var')
plotT = plotToggle;
else
plotT = false;
end
fun = @(a,b,c,d,x) -a./(b*x+c)+d;
x1 = X(1); y1 = Y(1);
Y1 = zeros(size(Y));
if plotT
figure, plot(X,Y,'LineWidth',2);
title('FID: Input')
end
Y1(2:end) = (Y(2:end,1)-y1)./(X(2:end,1)-x1);
Y1(1) = Y1(2);
if plotT
figure, plot(X,Y1,'LineWidth',2);
title('FID: Rolling slope')
end
fopt = fitoptions('Method','NonLinearLeastSquares','Lower',[0 -Inf -Inf -Inf],'Upper',[Inf Inf Inf Inf],'StartPoint',[1 10 10 0]);
if plotT
disp(fopt)
end
fitobj = fit(X,Y1,fun,fopt);
Y2 = fitobj(X);
if plotT
figure, plot(X,Y2,'LineWidth',2);
title('FID: Curve fit')
end
dY2 = Y2(end)-Y2(1);
f2 = Y2 < Y2(end) - dY2*cutoff;
rX = X(f2);
rY = Y(f2);
if plotT
nf2 = ~f2;
figure, plot(X,Y,'LineWidth',1);
hold on;
nrX = X(nf2);
nrY = Y(nf2);
plot(nrX,nrY,'LineWidth',2);
title('FID: Result')
end
end
\ No newline at end of file
fid.py 0 → 100644
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Feature Importance Detector presented in
Knowledge Discovery from Atomic Structures using Feature Importances (arXiv:2303.09453)
@author: Joakim Linja
"""
import numpy as np
import scipy as sp
def fid(X,Y,cutoff = 0.01):
"""
1. Bound rolling slope
2. Curve fit a/(b*x+c)+d
3. Remove features based on cutoff-defined percentage of total variance
Parameters
----------
X: numpy.ndarray
Feature score index array in ascending numerical order.
Y: numpy.ndarray
Feature score array. Assumed to be sorted to descending order.
cutoff: float, optional, [0,1]
Cutoff value to be used in the feature selection.
Default: 0.01
Returns
-------
rX: numpy.matrix
Feature selected X.
rY: numpy.matrix
Feature selected Y.
f2: numpy.ndarray
Boolean indexing array, the feature selection result.
"""
try:
def f(x,a,b,c,d):
return -a/(b*x+c)+d
x0 = X[0]; y0 = Y[0]
Y1 = np.zeros(len(Y))
for i in range(1,len(X)):
Y1[i] = (Y[i]-y0)/(X[i]-x0)
Y1[0] = Y1[1]
popt, pcov = sp.optimize.curve_fit(f,X,Y1,p0=[1,10,10,0],
bounds=([0,-np.inf,-np.inf,-np.inf],[np.inf,np.inf,np.inf,np.inf]))
Y2 = f(X,*popt)
dY2 = Y2[-1]-Y2[0]
f2 = Y2 < Y2[-1] - dY2*cutoff
rX = X[f2]
rY = Y[f2]
return rX, rY, f2
except RuntimeError as e:
raise e
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment