Explain me this code.

Question

0 개 추천

function W = lofdd(a, fracrej, k, distmat, sD)
%distmat and sD are optional parameters and are mainly used by lofrangedd
if (nargin < 5)
   distmat = [];
   sD = [];
end
if nargin < 3 || isempty(k), k = 3; end
if nargin < 2 || isempty(fracrej), fracrej = 0.05; end
if nargin < 1 || isempty(a) % empty lofdd
  W = mapping(mfilename,{fracrej,k});
  W = setname(W,sprintf('LOF k:%d', k));
  return
end
if ~ismapping(fracrej)           %training
  % some checking of datatypes and sizes:
  a = +target_class(a);  % make sure we have a OneClass dataset
  [m,d] = size(a);
    if (m<2)
        warning('dd_tools:InsufficientData','Dataset contains less than 2 objects');
    end
    if (k>=m)
        error(['More neighbors than training samples are requested! (max=',num2str(m-1),')']);
    end
    if isa(k,'char')
        error('Argument k should define the number of neighbors');
    end
    if (k<1)
    warning('dd_tools:KNegativeK','K must be positive (>0)');
    end
      if(isempty(distmat) || isempty(sD))
          % calculate the euclidian distance matrix
          distmat = sqrt(sqeucldistm(a,a));
          % sort the distances
          [sD,I] = sort(distmat,2);
      end
          % compute the LOF values of the training samples:
          % k-distance of each object (k+1 because the first object is
          % the object itself, and is not considered to be part of the
          % neighborhood
          k_distance = sD(:,k+1);
          % construct the neighborhood matrix
          k_distance_neighborhood = zeros(m,m);
          for p = 1:m    
              k_distance_neighborhood(p,:) = logical(distmat(p,:) <= k_distance(p));
              k_distance_neighborhood(p,p) = 0;
          end
          k_distance_neighborhood_size = sum(k_distance_neighborhood,2);
          % compute reachability distances
          % please note that this distance is not symmetric
          reachability_distance = zeros(m,m);
          for p = 1:m
              for o = 1:m
                  reachability_distance(p,o) = max(k_distance(o), distmat(p,o));
              end
          end
          % compute local reachability density
          local_reachability_density = zeros(m,1);
          for p = 1:m      
              local_reachability_density(p) = 1 ./ (1e-10+(sum(reachability_distance(p,logical(k_distance_neighborhood(p,:))) / k_distance_neighborhood_size(p))));
          end
          % compute the local outlier factor
          lof = zeros(m,1);
          for p = 1:m
             lof(p) = sum(local_reachability_density(logical(k_distance_neighborhood(p,:))) / local_reachability_density(p)) / k_distance_neighborhood_size(p);
          end
      fit = lof;    
  %now obtain the threshold:
  thresh = dd_threshold(fit,1-fracrej);
  %and save all useful data:
    W.distmat = distmat;
    W.sD = sD;
    W.k_distance = k_distance;
    W.local_reachability_density = local_reachability_density;
    W.lof = lof;
  W.x = +a;
  W.k = k;
  W.threshold = thresh;
  W.scale = mean(fit);
  W = mapping(mfilename,'trained',W,str2mat('target','outlier'),d,2);
  W = setname(W,sprintf('LOF k:%d', k));
else                               %testing
  W = getdata(fracrej);  % unpack
    %m is the number of test objects
  [m,d] = size(a);
    [n,d] = size(W.x);
  % calculate the euclidean distance matrix
    if(isempty(distmat) || isempty(sD))
        distmat = sqrt(sqeucldistm(+a,W.x));    %dist between train and test
        [sD,I] = sort(distmat,2);
    end
      new_train_distmat = zeros(n+1,n+1);
      new_train_distmat(1:n,1:n) = W.distmat; 
      % compute the LOF values of the test samples:
          % k-distance of each object
          % no k+1 this time because the distance to the test object itself
          % is not present in the distance matrix
          k_distance = sD(:,W.k);
          % construct the neighborhood matrix
          k_distance_neighborhood = zeros(m,n);
          for p = 1:m
              k_distance_neighborhood(p,:) = logical(distmat(p,:) <= k_distance(p));
          end
          %compute the lof value for each object p:
          % add object p to the distance matrix of the training objects
          % p is the last object
          lof = zeros(m,1);
          for p = 1:m
              new_train_distmat(n+1,1:n) = distmat(p,:);
              new_train_distmat(1:n,n+1) = distmat(p,:)';
              [new_train_sD, I] = sort(new_train_distmat, 2);       
              new_train_k_distance = new_train_sD(:,W.k+1);
              %loop through the neighbors of p:
              neighbors_of_p = [n+1, find(logical(k_distance_neighborhood(p,:)))];
              lrd_of_nn_of_p = zeros(numel(neighbors_of_p), 1);
              sum_lrd_fraction = 0;
              nn_index = 0;
              for nn = neighbors_of_p
                  nn_index = nn_index + 1;
                  %determine neighbors of nn (which is a neighbor p)
                  neighbors_of_neighbors_of_p = logical(new_train_distmat(nn,:) <= new_train_k_distance(nn));
                  neighbors_of_neighbors_of_p(nn) = 0;
                  sum_reach_dist = 0;
                  num_nn_nn = 0;
                  for nn_nn = find(neighbors_of_neighbors_of_p)
                      num_nn_nn = num_nn_nn + 1;
                      sum_reach_dist = sum_reach_dist + max(new_train_k_distance(nn_nn), new_train_distmat(nn, nn_nn));
                  end
                  lrd = 1 / ((sum_reach_dist + 1e-10) / num_nn_nn);
                  lrd_of_nn_of_p(nn_index) = lrd;
                  if(nn_index > 1),
                      sum_lrd_fraction = sum_lrd_fraction + (lrd / lrd_of_nn_of_p(1));
                  end
              end
              lof(p) = sum_lrd_fraction / (nn_index-1);
          end    
      ind = lof;
  % store the results in the final dataset:
  out = [ind repmat(W.threshold,[m,1])];
  % Store the distance as output:
  W = setdat(a,-out,fracrej);
  W = setfeatdom(W,{[-inf 0;-inf 0] [-inf 0; -inf 0]});
end
return

댓글 수: 0
이전 댓글 -2개 표시 이전 댓글 -2개 숨기기

댓글을 달려면 로그인하십시오.

이 질문에 답변하려면 로그인하십시오.

Follow Question

Answer 1

Walter Roberson 2015년 9월 9일

5 개 추천

My response would be "NO!"

The algorithm is described in the paper http://www.dbs.ifi.lmu.de/Publikationen/Papers/LOF.pdf

The code is commented.

If you do not understand the syntax of MATLAB then you can study it, http://www.mathworks.com/matlabcentral/answers/8026-best-way-s-to-master-matlab

Explaining non-trivial code to someone is very time-consuming when we cannot assume that you have any programming experience at all. What are you asking?? Are you asking about argument processing in MATLAB functions so you can understand about nargin ? Are you asking about how MATLAB structures are internally implemented? Are you asking about the Worst Case Analysis for the running time?

If you want to fly in to my city and study with me for about 4 months, I might be able to explain the code to you starting from scratch. Maybe. Unless, that is, you already have some programming experience, in which case you should be asking SPECIFIC questions about the parts you do not understand.

댓글 수: 1
이전 댓글 -1개 표시 이전 댓글 -1개 숨기기

SRI SINDHU S P 2015년 9월 9일

Thank you

댓글을 달려면 로그인하십시오.

Explain me this code.

댓글 수: 0
이전 댓글 -2개 표시 이전 댓글 -2개 숨기기

채택된 답변

댓글 수: 1
이전 댓글 -1개 표시 이전 댓글 -1개 숨기기

추가 답변 (0개)

카테고리

태그

Community Treasure Hunt

Explain me this code.

댓글 수: 0 이전 댓글 -2개 표시 이전 댓글 -2개 숨기기

채택된 답변

댓글 수: 1 이전 댓글 -1개 표시 이전 댓글 -1개 숨기기

추가 답변 (0개)

카테고리

태그

참고 항목

Community Treasure Hunt

댓글 수: 0
이전 댓글 -2개 표시 이전 댓글 -2개 숨기기

댓글 수: 1
이전 댓글 -1개 표시 이전 댓글 -1개 숨기기