Explorar o código

Anomaly detection for data less than 2 weeks

Andrew Kane %!s(int64=8) %!d(string=hai) anos
pai
achega
2a2e2b1b6d
Modificáronse 2 ficheiros con 23 adicións e 7 borrados
  1. 7 2
      lib/blazer/detect_anomalies.R
  2. 16 5
      lib/blazer/result.rb

+ 7 - 2
lib/blazer/detect_anomalies.R

@@ -3,11 +3,16 @@ tryCatch({
 
   args <- commandArgs(trailingOnly = TRUE)
 
-  con <- textConnection(args[1])
+  con <- textConnection(args[2])
   data <- read.csv(con, stringsAsFactors = FALSE)
   data$timestamp <- as.POSIXct(data$timestamp)
 
-  res = AnomalyDetectionTs(data, direction = "both", alpha = 0.05)
+  if (identical(args[1], "ts")) {
+    res = AnomalyDetectionTs(data, direction = "both", alpha = 0.05)
+  } else {
+    res = AnomalyDetectionVec(data$count, direction = "both", alpha = 0.05, period = length(data$count) / 2 - 1)
+  }
+
   write.csv(res$anoms)
 }, error = function (e) {
   write.csv(geterrmessage())

+ 16 - 5
lib/blazer/result.rb

@@ -106,6 +106,7 @@ module Blazer
             end
           rescue => e
             message = "#{current_series}: #{e.message}"
+            raise e if Rails.env.development?
           end
         else
           message = "Bad format"
@@ -126,10 +127,12 @@ module Blazer
           end
         end
 
-      timestamps = []
       r_script = %x[which Rscript].chomp
+      type = series.any? && series.last.first.to_time - series.first.first.to_time >= 2.weeks ? "ts" : "vec"
+      args = [type, csv_str]
       raise "R not found" if r_script.empty?
-      output = %x[#{r_script} --vanilla #{File.expand_path("../detect_anomalies.R", __FILE__)} #{Shellwords.escape(csv_str)}]
+      command = "#{r_script} --vanilla #{File.expand_path("../detect_anomalies.R", __FILE__)} #{args.map { |a| Shellwords.escape(a) }.join(" ")}"
+      output = %x[#{command}]
       if output.empty?
         raise "Unknown R error"
       end
@@ -138,10 +141,18 @@ module Blazer
       error = rows.first && rows.first["x"]
       raise error if error
 
-      rows.each do |row|
-        timestamps << Time.parse(row["timestamp"])
+      timestamps = []
+      if type == "ts"
+        rows.each do |row|
+          timestamps << Time.parse(row["timestamp"])
+        end
+        timestamps.include?(series.last[0].to_time)
+      else
+        rows.each do |row|
+          timestamps << row["index"].to_i
+        end
+        timestamps.include?(series.length)
       end
-      timestamps.include?(series.last[0].to_time)
     end
   end
 end