Browse Source

Added support for Amazon Athena - closes #158

Andrew Kane 6 years ago
parent
commit
3152cf1b00

+ 4 - 0
CHANGELOG.md

@@ -1,3 +1,7 @@
+## 1.8.1 [unreleased]
+
+- Added support for Amazon Athena
+
 ## 1.8.0
 
 - Added support for Rails 5.1

+ 17 - 4
README.md

@@ -389,12 +389,13 @@ data_sources:
 - [Oracle](#oracle)
 - [IBM DB2 and Informix](#ibm-db2-and-informix)
 - [SQLite](#sqlite)
-- [Redshift](#redshift)
+- [Amazon Redshift](#amazon-redshift)
+- [Amazon Athena](#amazon-athena-master) [master]
 - [Presto](#presto)
 - [Apache Drill](#apache-drill)
 - [Google BigQuery](#google-bigquery)
 - [MongoDB](#mongodb-1)
-- [Elasticsearch](#elasticsearch) [beta]
+- [Elasticsearch](#elasticsearch-beta) [beta]
 
 You can also [create an adapter](#creating-an-adapter) for any other data store.
 
@@ -454,7 +455,7 @@ data_sources:
     url: sqlite3:path/to/database.sqlite3
 ```
 
-### Redshift
+### Amazon Redshift
 
 Add [activerecord4-redshift-adapter](https://github.com/aamine/activerecord4-redshift-adapter) or [activerecord5-redshift-adapter](https://github.com/ConsultingMD/activerecord5-redshift-adapter) to your Gemfile and set:
 
@@ -464,6 +465,18 @@ data_sources:
     url: redshift://user:password@hostname:5439/database
 ```
 
+### Amazon Athena [master]
+
+Add [aws-sdk](https://github.com/aws/aws-sdk-ruby) `~> 2` to your Gemfile and set:
+
+```yml
+data_sources:
+  my_source:
+    adapter: athena
+    database: database
+    output_location: s3://some-bucket/
+```
+
 ### Presto
 
 Add [presto-client](https://github.com/treasure-data/presto-client-ruby) to your Gemfile and set:
@@ -507,7 +520,7 @@ data_sources:
     url: mongodb://user:password@hostname:27017/database
 ```
 
-### Elasticsearch
+### Elasticsearch [beta]
 
 Add [elasticsearch](https://github.com/elastic/elasticsearch-ruby) to your Gemfile and set:
 

+ 1 - 1
app/views/blazer/queries/show.html.erb

@@ -62,7 +62,7 @@
   </script>
 <% end %>
 
-<% if %w[sql presto drill bigquery].include?(Blazer.data_sources[@query.data_source].adapter) %>
+<% if %w[sql presto drill bigquery athena].include?(Blazer.data_sources[@query.data_source].adapter) %>
   <script>
     // do not highlight really long queries
     // this can lead to performance issues

+ 2 - 0
lib/blazer.rb

@@ -7,6 +7,7 @@ require "blazer/data_source"
 require "blazer/result"
 require "blazer/run_statement"
 require "blazer/adapters/base_adapter"
+require "blazer/adapters/athena_adapter"
 require "blazer/adapters/bigquery_adapter"
 require "blazer/adapters/drill_adapter"
 require "blazer/adapters/elasticsearch_adapter"
@@ -169,6 +170,7 @@ end
 
 Blazer.register_adapter "drill", Blazer::Adapters::DrillAdapter
 Blazer.register_adapter "bigquery", Blazer::Adapters::BigQueryAdapter
+Blazer.register_adapter "athena", Blazer::Adapters::AthenaAdapter
 Blazer.register_adapter "elasticsearch", Blazer::Adapters::ElasticsearchAdapter
 Blazer.register_adapter "mongodb", Blazer::Adapters::MongodbAdapter
 Blazer.register_adapter "presto", Blazer::Adapters::PrestoAdapter

+ 72 - 0
lib/blazer/adapters/athena_adapter.rb

@@ -0,0 +1,72 @@
+module Blazer
+  module Adapters
+    class AthenaAdapter < BaseAdapter
+      def run_statement(statement, comment)
+        require "digest/md5"
+
+        columns = []
+        rows = []
+        error = nil
+
+        begin
+          resp =
+            client.start_query_execution(
+              query_string: statement,
+              # use token so we fetch cached results after query is run
+              client_request_token: Digest::MD5.hexdigest(statement),
+              query_execution_context: {
+                database: settings["database"],
+              },
+              result_configuration: {
+                output_location: settings["output_location"]
+              }
+            )
+          query_execution_id = resp.query_execution_id
+
+          timeout = data_source.timeout || 300
+          stop_at = Time.now + timeout
+          resp = nil
+
+          begin
+            resp = client.get_query_results(
+              query_execution_id: query_execution_id
+            )
+          rescue Aws::Athena::Errors::InvalidRequestException => e
+            if e.message != "Query has not yet finished. Current state: RUNNING"
+              raise e
+            end
+            if Time.now < stop_at
+              sleep(3)
+              retry
+            end
+          end
+
+          if resp
+            column_info = resp.result_set.result_set_metadata.column_info
+            columns = column_info.map(&:name)
+            column_types = column_info.map(&:type)
+
+            untyped_rows = []
+            resp.each do |page|
+              untyped_rows.concat page.result_set.rows.map { |r| r.data.map(&:var_char_value) }
+            end
+
+            rows = untyped_rows[1..-1] # TODO use column_types
+          else
+            error = Blazer::TIMEOUT_MESSAGE
+          end
+        rescue Aws::Athena::Errors::InvalidRequestException => e
+          error = e.message
+        end
+
+        [columns, rows, error]
+      end
+
+      private
+
+      def client
+        @client ||= Aws::Athena::Client.new
+      end
+    end
+  end
+end

+ 1 - 1
lib/blazer/data_source.rb

@@ -12,7 +12,7 @@ module Blazer
       @id = id
       @settings = settings
 
-      unless settings["url"] || Rails.env.development? || settings["adapter"] == "bigquery"
+      unless settings["url"] || Rails.env.development? || ["bigquery", "athena"].include?(settings["adapter"])
         raise Blazer::Error, "Empty url for data source: #{id}"
       end