Generating a Data Dictionary in Google Sheet

google
api

(romain.ducarrouge) #1

Why

Looker enables more people to access data and run ad-hoc queries, but sometimes the meaning of the field or its source is unknown from the end user and leads to incorrect queries.
One way to prevent this is to define a data dictionary using the Looker API to populate this information in Google Spreadsheets.
This script is particularly helpful when a description parameter is defined for the fields in the views.

The script

This script is defined in a mix of Apps Script and Javascript using the functions listed on this repo.

Some key elements to keep in mind when using the script below are:
1. the model_name and explore_name are case sensitive
2. the set up is to populate each view’s information in the Google Sheet with the tab name matching the view name (to not use this, please check out the comments below to see the lines enforcing this behavior)

In this script we are using a Google Sheet mapping to each view in the Looker Explore. Each Google sheet has the name of the Looker view, and only data relevant to that view is displayed in the Google Sheet.

This also puts the output of the API call in cache so that the call does not get run on each opening of the sheets but on the first run of the sheet and then gets cached for 6 hours.

// Replace this with your base domain 
var BASE_URL = 'https://your_company.com:19999/api/3.0';
// Replace this with your API credentials
var CLIENT_ID = 'your_api_ID'; 
var CLIENT_SECRET = 'your_api_key'; 

function LOOKER_GET_DATA_DICTIONARY(model_name) {
   //checks for previous cached entry 
   var cache = CacheService.getScriptCache();
   var cached = cache.get("api_results");
   if (cached != null) {
     for (elem in cached) { 
       results.push(elem)
     return results;
     }
  }
  // if nothing in cache, run the call
  try {

  var options = {
        'method': 'get',
        'headers': {
            'Authorization': 'token ' + login()
        }
    };

    // api call to the /lookml_models/{lookml_model_name} endpoint
    var response = UrlFetchApp.fetch(BASE_URL + "/lookml_models/" + model_name, options);
    var explores = JSON.parse(response.getContentText()).explores;
    var result = [];
    
    // defining the fields to retrieve for the Google Sheets
    result.push(["View Name", "Field Type", "Name", "Label", "Type", "Description", "Hidden"]);
                 // additional details if needed:
                 //, "SQL", "Source"]);

    
    for (var i = 0; len = explores.length, i < len; i++) {
        Logger.log(explores);

        var explore = explores[i].name;
        var explore_results = UrlFetchApp.fetch(BASE_URL + "/lookml_models/" + model_name + "/explores/" + explore, options);

        var connection = JSON.parse(explore_results.getContentText()).connection_name;
        var dimensions = JSON.parse(explore_results.getContentText()).fields.dimensions;
        var measures = JSON.parse(explore_results.getContentText()).fields.measures;
        var current_sheet = SpreadsheetApp.getActiveSheet().getName();

        // using this test to retrieve only data relevant to a specific explore
        // change explore_name with your explore
        if (explore == "explore_name") {
        
        // adding the data for the dimensions
          for (var j = 0; j < dimensions.length; j++) {
            // checks that only the fields from the underlying Looker view matching the name of the Google sheet are displayed
            if (dimensions[j].view.replace("_", " ") == current_sheet.toLowerCase()|| dimensions[j].view_label.replace("_", " ").toLowerCase() == current_sheet.toLowerCase()) {     
              result.push([dimensions[j].view, 
                         "Dimension", 
                         (dimensions[j].name.substring((dimensions[j].name.indexOf(".")+1), dimensions[j].name.length)).replace(/_/g, " "), 
                         (dimensions[j].label != null ? dimensions[j].label : (dimensions[j].name.substring((dimensions[j].name.indexOf(".")+1), dimensions[j].name.length).replace(/_/g, " "))), 
                         (dimensions[j].type != null ? (dimensions[j].type).replace("_", " ") : "String"),
                         dimensions[j].description,  
                         dimensions[j].hidden, dimensions[j].view_label
                         //, (dimensions[j].sql != null ? dimensions[j].sql : ""), 
                         //dimensions[j].source_file
                         ]);
          }
          }
          
          // adding the data for the measures
          for (var k = 0; k < measures.length; k++) {
            // checks that only the fields from the view matching the name of the sheet are displayed
            if (measures[k].view.replace("_", " ") == current_sheet.toLowerCase() || measures[k].view_label.replace("_", " ").toLowerCase() == current_sheet.toLowerCase()) {
              result.push([measures[k].view, 
                         "Measure", 
                         (measures[k].name.substring((measures[k].name.indexOf(".")+1), measures[k].name.length).replace(/_/g, " ")), 
                         (measures[k].label != null ? measures[k].label : (measures[k].name.substring((measures[k].name.indexOf(".")+1), measures[k].name.length)).replace(/_/g, " ")), 
                         (measures[k].type != null ? (measures[k].type).replace("_", " ") : "String"), 
                         measures[k].description, 
                         measures[k].hidden
                         //, (measures[k].sql != null ? measures[k].sql : ""), 
                         //measures[k].source_file
                         ]);
        }
        }
        }
    }
    cache.put("api_results", result, 21600); // cache for 6 hours
    //the maximum cache time is 6 hours (21600 seconds) 
    //the default time is 10 minutes (600 seconds).
    return result
  } catch(err) {
    return "Something went wrong. " + err
}
}

function login() {
  try{
    var post = {
        'method': 'post'
    };
    var response = UrlFetchApp.fetch(BASE_URL + "/login?client_id=" + CLIENT_ID + "&client_secret=" + CLIENT_SECRET, post);
    return JSON.parse(response.getContentText()).access_token;
  } catch(err) {
    Logger.log(err);
    return "Could not login to Looker. Check your credentials.";
  }
}

With the code above we can set a cell of our Google Sheet that way to call the function on a particular model:
=LOOKER_GET_DATA_DICTIONARY("model_name")

Note :warning:️

Instead of storing the API credentials in the script directly, you can also use the following code from the repository to require the credentials to be inputed in the UI on the Google Sheet:

/ set credentials via prompt
function setCred() {
    var ui = SpreadsheetApp.getUi();
    var base_url_input = ui.prompt("Set your Looker API credentials", "Base URL (e.g. https://yourdomain.looker.com:19999/api/3.0):", ui.ButtonSet.OK_CANCEL);
    var client_id_input = ui.prompt("Set your Looker API credentials", "Client ID:", ui.ButtonSet.OK_CANCEL);
    var client_id_secret = ui.prompt("Set your Looker API credentials", "Client Secret:", ui.ButtonSet
        .OK_CANCEL);
    var scriptProperties = PropertiesService.getScriptProperties();
    // assign them to scriptProperties so the user doesn't have to enter them over and over again
    scriptProperties.setProperty("BASE_URL", base_url_input.getResponseText());
    scriptProperties.setProperty("CLIENT_ID", client_id_input.getResponseText());
    scriptProperties.setProperty("CLIENT_SECRET", client_id_secret.getResponseText());
    // test the credentials with a /user call
    testCred();
}

// testing the existing creds
function testCred() {
    var ui = SpreadsheetApp.getUi();
        var options = {
            "method": "get",
            "headers": {
                "Authorization": "token " + login()
            }
        };
    try {
        var response = UrlFetchApp.fetch(BASE_URL + "/user", options);
        var success_header = "Successfully set API credentials!";
        var success_content = "Authenticated as " + JSON.parse(response.getContentText()).first_name +
            " " + JSON.parse(response.getContentText()).last_name + " (user " + JSON.parse(response.getContentText()).id +").Keep in mind that API credentials are script/spreadsheet bound. This is needed for the custom formulas to keep on working for other users. Hit 'Test' to test your credentials or 'Delete' to remove the currently set credentials.";
        var result = ui.alert(success_header, success_content, ui.ButtonSet.OK);
    } catch (err) {
        var result = ui.alert("Invalid credentials / Credentials not set!",
            "Doublecheck your base URL and your client ID & secret.", ui.ButtonSet.OK);
    }
}

// delete credentials from scriptProperties
function deleteCred() {
    var scriptProperties = PropertiesService.getScriptProperties();
    scriptProperties.deleteAllProperties();
}

Additional resources

We also have this post using Python to create a .csv data dictionary.
A similar post to achieve this in Ruby is here.


Data Dictionary Best Practices
(Carly K) #2

This looks awesome! i just tried out the code and get the following error. Any ideas?

Something went wrong. Exception: Request failed for https://<companyname>.looker.com:19999/api/3.0/lookml_models/two_oh/explores/fact_cart_coupon returned code 404. Truncated server response: {"message":"Not found","documentation_url":"http://docs.looker.com/"} (use muteHttpExceptions option to examine full response)


(romain.ducarrouge) #3

Hey @carlyk I am glad you like the script!

Regarding the error that you are getting, this usually points toward the model_name not being accessible in the call you are running directly on the Spreadsheet via the =LOOKER_GET_DATA_DICTIONARY("model_name")
Either

  • there is a typo or
  • a case sensitive issue with the model_name (try “Two_Oh” if the model name has a capitalized first letter) or
  • perhaps the user’s credentials used there to run the script does not have access to this particular model in the model set.

Please let us know if that solved it!


(Carly K) #4

That was exactly the issue. I didn’t realize it was case sensitive. Also, just a note for others who are using this. You must make the tab name the same as the view_label. It will not work if you just put the view name itself.


(romain.ducarrouge) #5

@carlyk yes good point, the current script does this in order to populate one view per Google Sheet (ensuring the view name matches the Sheet’s name)

if you would like to not use this, for let’s say populating the entire content in just one Sheet you could remove the code that does this:

and


(romain.ducarrouge) #6

Following the approach above, we can apply the very same logic in order to get a list of the User Attributes defined in our instance along with their respective details like this:

Below is the code to get this working:

// Replace this with your base domain 
var BASE_URL = 'https://company.looker.com:19999/api/3.0';
// Replace this with your API credentials
var CLIENT_ID = '12345'; 
var CLIENT_SECRET = 'ABCD'; 

function GET_USER_ATTRIBUTES_DICTIONARY() {
  var options = {
        'method': 'get',
        'headers': {
            'Authorization': 'token ' + login()
        }
    };

    // api call to the /user_attributes endpoint
    var response = UrlFetchApp.fetch(BASE_URL + "/user_attributes", options);
    var output = JSON.parse(response.getContentText());
    var result = [];
    
    // defining the fields to retrieve for the Google Sheets
    result.push(["ID", "Name", "Label", "Type", "Default Value"]);
    result.push(output.length);
  
    for (var i = 0; len = output.length, i < len; i++) {
        Logger.log(output);

        var id = output[i].id;
        var name = output[i].name;
        var label = output[i].label;
        var type = output[i].type;
        var default_value = output[i].default_value;
        result.push([id,
                   name,
                   label,
                   type,
                   default_value]);
    }
    return result
  } 


function login() {
  try{
    var post = {
        'method': 'post'
    };
    var response = UrlFetchApp.fetch(BASE_URL + "/login?client_id=" + CLIENT_ID + "&client_secret=" + CLIENT_SECRET, post);
    return JSON.parse(response.getContentText()).access_token;
  } catch(err) {
    Logger.log(err);
    return "Could not login to Looker. Check your credentials.";
  }
}

(romain.ducarrouge) #7

:warning:️ Due to quotas on the Apps Script side of things (Script runtime, Custom function runtime, URL Fetch response size) using the code above on models with high explore count (and number of views joined into all the explores is also a factor) may fail with the following error:
#ERROR - Internal error executing the custom function

In order to circumvent this we can use the function below to call the explores in the model(s) one at a time;

function GetDictionaryForModelExplore(model_name, explore_name) {
try {
        var options = {
            "method": "get",
            "headers": {
                "Authorization": "token " + login()
            }
        };

        var result = [];
        // push header row first
        result.push(["Connection", "Explore Name", "View Name", "Field Type", "Name", "Label", "Type",
            "Description", "Hidden", "SQL", "Source"
        ]);

        var explore_results = UrlFetchApp.fetch(BASE_URL + "/lookml_models/" + model_name + "/explores/" +
                explore_name, options);
        // get connection, dimensions, measures on the explore
        var connection = JSON.parse(explore_results.getContentText()).connection_name;
        var dimensions = JSON.parse(explore_results.getContentText()).fields.dimensions;
        var measures = JSON.parse(explore_results.getContentText()).fields.measures;
  
        var explore = explore_name;
        // for dimension in explore, add dimension to results
        for (var j = 0; j < dimensions.length; j++) {
            result.push([
                connection, explore, dimensions[j].view, "Dimension",
                dimensions[j].name, dimensions[j].label, dimensions[j].type,
                dimensions[j].description, "hidden: " + dimensions[j].hidden, 
                (dimensions[j].sql != null ? dimensions[j].sql : ""), dimensions[j].source_file
            ]);
            }

        // for measure in explore, add measure to results
        for (var k = 0; k < measures.length; k++) {
            result.push([
              connection, explore, measures[k].view, "Measure", measures[k].name,
              measures[k].label, measures[k].type, measures[k].description, "hidden: " + measures[k].hidden,
              (measures[k].sql != null ? measures[k].sql : ""), measures[k].source_file
            ]);        
    }
    return result
    } catch (err) {
        Logger.log('GetDictionaryForModelExplore() yielded an error: ' + err);
        return "Uh oh! Something went wrong. Check your API credentials and if you're passing the correct parameters and that your model exists!"
    }
}