From a66d15cc32e869de5efd0d6609dbb96146f3da66 Mon Sep 17 00:00:00 2001 From: Timothy Itodo Date: Mon, 15 Aug 2022 11:13:04 -0500 Subject: [PATCH 1/3] read SA data & write to cloud loggin --- .../README.md | 66 ++++++ .../main.py | 210 ++++++++++++++++++ 2 files changed, 276 insertions(+) create mode 100644 examples/python/extract-logs-write-to-cloud-logging/README.md create mode 100644 examples/python/extract-logs-write-to-cloud-logging/main.py diff --git a/examples/python/extract-logs-write-to-cloud-logging/README.md b/examples/python/extract-logs-write-to-cloud-logging/README.md new file mode 100644 index 000000000..18178eed1 --- /dev/null +++ b/examples/python/extract-logs-write-to-cloud-logging/README.md @@ -0,0 +1,66 @@ +## Overview + +A Python script that extracts Looker system/audit logs from [System Activity](https://docs.looker.com/admin-options/system-activity) and exports the Logs to Cloud Logging. This example tries to format the output logs like a [GCP Audit Log](https://cloud.google.com/logging/docs/audit/understanding-audit-logs) as best as possible. See [mapping](#gcp-audit-log-fields-to-looker-system-activity-mapping) for comparison between Looker System Activity Fields and GCP Audit Log Fields + +> **_NOTE:_** The script extracts System Activity data from the last 10 minutes. You can then schedule this script to run every 10 minutes using a cron job or equivalent + +## Requirements +- Looker Instance in which you have Admin or `see_system_activity` permission +- Google Cloud Project with Cloud Logging API enabled +- [pyenv](https://github.com/pyenv/pyenv#installation) installed + +## Deployment + +- Clone the repo and navigate to this directory + ``` + git clone https://github.com/looker-open-source/sdk-codegen.git + cd sdk-codegen/examples/python/extract-logs-write-to-cloud-logging + ``` + +- Setup Python Virtual environment + ``` + pyenv install 3.8.2 + pyenv local 3.8.2 + python -m venv .venv + ``` + +- Install dependencies + ``` + pip install looker-sdk + pip install --upgrade google-cloud-logging + ``` + + +- Create API credentials and set environment variables + ``` + export LOOKERSDK_BASE_URL="" + export LOOKERSDK_CLIENT_ID="" + export LOOKERSDK_CLIENT_SECRET="" + ``` + +- Configure gcloud and [setup service account](https://cloud.google.com/logging/docs/reference/libraries#setting_up_authentication) to write Logs to Cloud Logging + ``` + gcloud config set project + export GOOGLE_APPLICATION_CREDENTIALS="" + ``` + +- Run `main.py` + ``` + python main.py + ``` + + +## GCP Audit Log Fields to Looker System Activity Mapping + +| GCP Audit Log Field | Looker System Actvity Field | +| ----------- | ----------- | +| [logName](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#:~:text=Fields-,logName,-string) | `looker_system_activity_logs` | +| [timestamp](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#:~:text=reported%20the%20error.-,timestamp,-string) | [event.created](https://docs.looker.com/admin-options/tutorials/events#:~:text=for%20example%2C%20create_dashboard-,created,-Date%20and%20time) | +| [resource.type](https://cloud.google.com/logging/docs/reference/v2/rest/v2/MonitoredResource#:~:text=Fields-,type,-string) | `looker_system_activity_logs` | +| [resource.type](https://cloud.google.com/logging/docs/reference/v2/rest/v2/MonitoredResource#:~:text=Fields-,type,-string) | `looker_system_activity_logs` | +| [insertId](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#:~:text=is%20LogSeverity.DEFAULT.-,insertid,-string) | [event.id](https://docs.looker.com/admin-options/tutorials/events#:~:text=Description-,id,-Unique%20numeric%20identifier) | +| `protoPayload.status` | [event.attribute.status](https://docs.looker.com/admin-options/tutorials/events#:~:text=Trigger-,Attributes,-add_external_email_to_scheduled_task) | +| `protoPayload.authenticationInfo` | [event.user_id](https://docs.looker.com/admin-options/tutorials/events#:~:text=of%20the%20event-,user_id,-Unique%20numeric%20ID), [event.sudo_user_id](https://docs.looker.com/admin-options/tutorials/events#:~:text=for%20example%2C%20dashboard-,sudo_user_id,-Unique%20numeric%20ID) | +| `protoPayload.authorizationInfo` | `permission_set.permissions` | +| `protoPayload.methodName` | [event.name](https://docs.looker.com/admin-options/tutorials/events#:~:text=triggered%20the%20event-,name,-Name%20of%20the) | +| `protoPayload.response` | [event_attributes](https://docs.looker.com/admin-options/tutorials/events#:~:text=Trigger-,Attributes,-add_external_email_to_scheduled_task) | diff --git a/examples/python/extract-logs-write-to-cloud-logging/main.py b/examples/python/extract-logs-write-to-cloud-logging/main.py new file mode 100644 index 000000000..28b13ee2e --- /dev/null +++ b/examples/python/extract-logs-write-to-cloud-logging/main.py @@ -0,0 +1,210 @@ +import json +from collections import defaultdict + +import looker_sdk +from looker_sdk import models40 as models + +from google.cloud import logging + +sdk = looker_sdk.init40() + + +def create_query(): + response = sdk.create_query( + body=models.WriteQuery( + model="system__activity", + view="event_attribute", + fields=[ + "event.id", + "event.name", + "event.category", + "event.sudo_user_id", + "event.created_time", + "user.email", + "user.name", + "permission_set.permissions", + "permission_set.name", + "permission_set.id", + "model_set.models", + "model_set.name", + "event_attribute.name", + "event_attribute.value", + "event_attribute.id", + "group.id", + "group.name", + "group.external_group_id", + "model_set.id", + "user.dev_branch_name" + ], + filters={"event.created_time": "10 minutes"}, + sorts=["event.created_time desc"], + filter_config={"event.created_time": [{ + "type": "past", + "values": [{ + "constant": "10", + "unit": "min" + }] + }]} + )) + + return response + + +def get_looker_data(): + query_id = create_query()["id"] + response = sdk.run_query( + query_id=query_id, + result_format="json") + return json.loads(response) + + +def group_permission_by_event_id(data): + output = defaultdict(set) + for r in data: + event_id = r['event.id'] + permission_data = json.dumps({ + 'permission_set_id': r['permission_set.id'], + 'permission_set_name': r['permission_set.name'], + 'permission_set_permissions': r['permission_set.permissions'], + }) + output[event_id].add(permission_data) + return output + + +def group_event_attribute_by_event_id(data): + output = defaultdict(set) + for r in data: + event_id = r['event.id'] + event_attribute_data = json.dumps({ + 'event_attribute_id': r['event_attribute.id'], + 'event_attribute_name': r['event_attribute.name'], + 'event_attribute_value': r['event_attribute.value'], + }) + output[event_id].add(event_attribute_data) + return output + + +def group_model_set_by_event_id(data): + output = defaultdict(set) + for r in data: + event_id = r['event.id'] + model_set_data = json.dumps({ + 'model_set_id': r['model_set.id'], + 'model_set_name': r['model_set.id'], + 'model_set_models': r['model_set.id'], + }) + output[event_id].add(model_set_data) + return output + + +def group_user_by_event_id(data): + output = defaultdict(set) + for r in data: + event_id = r['event.id'] + user_data = json.dumps({ + 'user_email': r['user.email'], + 'user_name': r['user.name'], + 'user_dev_branch_name': r['user.dev_branch_name'], + }) + output[event_id].add(user_data) + return output + + +def group_event_by_event_id(data): + output = defaultdict(set) + for r in data: + event_id = r['event.id'] + user_data = json.dumps({ + 'event_category': r['event.category'], + 'event_name': r['event.name'], + 'event_id': r['event.id'], + 'event_created_time': r['event.created_time'], + 'event_sudo_user_id': r['event.sudo_user_id'], + }) + output[event_id].add(user_data) + return output + + +def group_all(data): + user = group_user_by_event_id(data) + model_set = group_model_set_by_event_id(data) + event_attribute = group_event_attribute_by_event_id(data) + permission = group_permission_by_event_id(data) + event = group_event_by_event_id(data) + + event_id_set = set() + + for r in data: + event_id_set.add(r['event.id']) + + output = {} + for id in event_id_set: + output[id] = { + 'event': list(event[id]), + 'permission_set': list(permission[id]), + 'event_attribute': list(event_attribute[id]), + 'user': list(user[id]), + 'model_set': list(model_set[id]), + } + return output + + +def parse_event_attribute(event_attribute): + output = {} + for data in event_attribute: + r = json.loads(data) + output[r['event_attribute_name']] = r['event_attribute_value'] + return output + + +def get_status(data): + ea = parse_event_attribute(data) + if 'status' in ea: + return ea['status'] + return '' + + +def format(aggregated_data): + data = aggregated_data + output = [] + + for id in aggregated_data: + + output.append({ + 'logName': 'looker_system_activity_logs', + 'timestamp': json.loads(data[id]['event'][0])['event_created_time'], + 'insertId': id, + 'resource': { + 'type': 'looker', + }, + 'protoPayload': { + '@type': 'looker_system_activity_logs', + 'authenticationInfo': { + 'principalEmail': json.loads(data[id]['user'][0])['user_email'] + }, + 'serviceName': 'looker.com', + 'methodName': json.loads(data[id]['event'][0])['event_name'], + 'details': parse_event_attribute(data[id]['event_attribute']), + 'status': get_status(data[id]['event_attribute']), + } + }) + + return output + + +def write_log_entry(formatted_data): + + logging_client = logging.Client() + logger = logging_client.logger('looker_system_activity_logs') + + for log in formatted_data: + logger.log_struct(log) + + print("Wrote logs to {}.".format(logger.name)) + + +if __name__ == "__main__": + data = get_looker_data() + agg_data = group_all(data) + formatted_data = format(agg_data) + write_log_entry(formatted_data) From 394bcb13f365ba3fccf92326187b13d76f180e42 Mon Sep 17 00:00:00 2001 From: Timothy Itodo Date: Tue, 6 Sep 2022 09:42:56 -0500 Subject: [PATCH 2/3] clean up --- .../README.md | 45 ++++++++++--------- .../main.py | 23 ++++------ 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/examples/python/extract-logs-write-to-cloud-logging/README.md b/examples/python/extract-logs-write-to-cloud-logging/README.md index 18178eed1..74cf641df 100644 --- a/examples/python/extract-logs-write-to-cloud-logging/README.md +++ b/examples/python/extract-logs-write-to-cloud-logging/README.md @@ -1,48 +1,50 @@ ## Overview -A Python script that extracts Looker system/audit logs from [System Activity](https://docs.looker.com/admin-options/system-activity) and exports the Logs to Cloud Logging. This example tries to format the output logs like a [GCP Audit Log](https://cloud.google.com/logging/docs/audit/understanding-audit-logs) as best as possible. See [mapping](#gcp-audit-log-fields-to-looker-system-activity-mapping) for comparison between Looker System Activity Fields and GCP Audit Log Fields +A Python script that extracts [System Activity](https://docs.looker.com/admin-options/system-activity) data from the last 10 minutes, formats the data as Audit Logs, and exports the logs to Cloud Logging. The data formatting/mapping is best effort. See [data mapping](#gcp-audit-log-fields-to-looker-system-activity-mapping) below. -> **_NOTE:_** The script extracts System Activity data from the last 10 minutes. You can then schedule this script to run every 10 minutes using a cron job or equivalent +**_NOTE:_** You can schedule this script to run every 10 minutes using a cron job or equivalent to continually create and export logs. ## Requirements - Looker Instance in which you have Admin or `see_system_activity` permission - Google Cloud Project with Cloud Logging API enabled +- python 3.6+ installed - [pyenv](https://github.com/pyenv/pyenv#installation) installed +- [gcloud](https://cloud.google.com/sdk/docs/install) installed ## Deployment -- Clone the repo and navigate to this directory +- Create [Looker API credentials](https://docs.looker.com/reference/api-and-integration/api-auth) and set the below environment variables ``` - git clone https://github.com/looker-open-source/sdk-codegen.git - cd sdk-codegen/examples/python/extract-logs-write-to-cloud-logging + export LOOKERSDK_BASE_URL="" + export LOOKERSDK_CLIENT_ID="" + export LOOKERSDK_CLIENT_SECRET="" + ``` + +- Create and configure a [service account](https://cloud.google.com/logging/docs/reference/libraries#setting_up_authentication) to write log entries to Cloud Logging and download the keys + ``` + export GOOGLE_APPLICATION_CREDENTIALS="" ``` -- Setup Python Virtual environment +- Clone the repo + ``` + git clone https://github.com/itodotimothy6/extract-looker-logs.git + cd extract-looker-logs/ + ``` + +- Setup python virtual environment ``` pyenv install 3.8.2 pyenv local 3.8.2 python -m venv .venv + source .venv/bin/activate ``` - Install dependencies ``` pip install looker-sdk - pip install --upgrade google-cloud-logging - ``` - - -- Create API credentials and set environment variables - ``` - export LOOKERSDK_BASE_URL="" - export LOOKERSDK_CLIENT_ID="" - export LOOKERSDK_CLIENT_SECRET="" + pip install google-cloud-logging ``` -- Configure gcloud and [setup service account](https://cloud.google.com/logging/docs/reference/libraries#setting_up_authentication) to write Logs to Cloud Logging - ``` - gcloud config set project - export GOOGLE_APPLICATION_CREDENTIALS="" - ``` - Run `main.py` ``` @@ -52,12 +54,11 @@ A Python script that extracts Looker system/audit logs from [System Activity](ht ## GCP Audit Log Fields to Looker System Activity Mapping -| GCP Audit Log Field | Looker System Actvity Field | +| GCP Audit Log Field | Looker System Actvity Field or Value| | ----------- | ----------- | | [logName](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#:~:text=Fields-,logName,-string) | `looker_system_activity_logs` | | [timestamp](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#:~:text=reported%20the%20error.-,timestamp,-string) | [event.created](https://docs.looker.com/admin-options/tutorials/events#:~:text=for%20example%2C%20create_dashboard-,created,-Date%20and%20time) | | [resource.type](https://cloud.google.com/logging/docs/reference/v2/rest/v2/MonitoredResource#:~:text=Fields-,type,-string) | `looker_system_activity_logs` | -| [resource.type](https://cloud.google.com/logging/docs/reference/v2/rest/v2/MonitoredResource#:~:text=Fields-,type,-string) | `looker_system_activity_logs` | | [insertId](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#:~:text=is%20LogSeverity.DEFAULT.-,insertid,-string) | [event.id](https://docs.looker.com/admin-options/tutorials/events#:~:text=Description-,id,-Unique%20numeric%20identifier) | | `protoPayload.status` | [event.attribute.status](https://docs.looker.com/admin-options/tutorials/events#:~:text=Trigger-,Attributes,-add_external_email_to_scheduled_task) | | `protoPayload.authenticationInfo` | [event.user_id](https://docs.looker.com/admin-options/tutorials/events#:~:text=of%20the%20event-,user_id,-Unique%20numeric%20ID), [event.sudo_user_id](https://docs.looker.com/admin-options/tutorials/events#:~:text=for%20example%2C%20dashboard-,sudo_user_id,-Unique%20numeric%20ID) | diff --git a/examples/python/extract-logs-write-to-cloud-logging/main.py b/examples/python/extract-logs-write-to-cloud-logging/main.py index 28b13ee2e..e82fe907d 100644 --- a/examples/python/extract-logs-write-to-cloud-logging/main.py +++ b/examples/python/extract-logs-write-to-cloud-logging/main.py @@ -20,13 +20,6 @@ def create_query(): "event.category", "event.sudo_user_id", "event.created_time", - "user.email", - "user.name", - "permission_set.permissions", - "permission_set.name", - "permission_set.id", - "model_set.models", - "model_set.name", "event_attribute.name", "event_attribute.value", "event_attribute.id", @@ -34,17 +27,17 @@ def create_query(): "group.name", "group.external_group_id", "model_set.id", + "model_set.models", + "model_set.name", + "permission_set.permissions", + "permission_set.name", + "permission_set.id", + "user.email", + "user.name", "user.dev_branch_name" ], filters={"event.created_time": "10 minutes"}, - sorts=["event.created_time desc"], - filter_config={"event.created_time": [{ - "type": "past", - "values": [{ - "constant": "10", - "unit": "min" - }] - }]} + sorts=["event.created_time desc"] )) return response From ac848bb5bb49a6b9f44b0e247cb9a66bfacd8c73 Mon Sep 17 00:00:00 2001 From: Timothy Itodo Date: Mon, 19 Sep 2022 09:37:46 -0500 Subject: [PATCH 3/3] corrections --- .../README.md | 8 ++------ .../extract-logs-write-to-cloud-logging/main.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/examples/python/extract-logs-write-to-cloud-logging/README.md b/examples/python/extract-logs-write-to-cloud-logging/README.md index 74cf641df..05d41a7d4 100644 --- a/examples/python/extract-logs-write-to-cloud-logging/README.md +++ b/examples/python/extract-logs-write-to-cloud-logging/README.md @@ -8,8 +8,6 @@ A Python script that extracts [System Activity](https://docs.looker.com/admin-op - Looker Instance in which you have Admin or `see_system_activity` permission - Google Cloud Project with Cloud Logging API enabled - python 3.6+ installed -- [pyenv](https://github.com/pyenv/pyenv#installation) installed -- [gcloud](https://cloud.google.com/sdk/docs/install) installed ## Deployment @@ -27,14 +25,12 @@ A Python script that extracts [System Activity](https://docs.looker.com/admin-op - Clone the repo ``` - git clone https://github.com/itodotimothy6/extract-looker-logs.git - cd extract-looker-logs/ + git clone https://github.com/looker-open-source/sdk-codegen.git + cd sdk-codegen/examples/python/extract-logs-write-to-cloud-logging ``` - Setup python virtual environment ``` - pyenv install 3.8.2 - pyenv local 3.8.2 python -m venv .venv source .venv/bin/activate ``` diff --git a/examples/python/extract-logs-write-to-cloud-logging/main.py b/examples/python/extract-logs-write-to-cloud-logging/main.py index e82fe907d..eb3f7ef66 100644 --- a/examples/python/extract-logs-write-to-cloud-logging/main.py +++ b/examples/python/extract-logs-write-to-cloud-logging/main.py @@ -15,26 +15,26 @@ def create_query(): model="system__activity", view="event_attribute", fields=[ + "event.category", + "event.created_time", "event.id", "event.name", - "event.category", "event.sudo_user_id", - "event.created_time", + "event_attribute.id", "event_attribute.name", "event_attribute.value", - "event_attribute.id", + "group.external_group_id", "group.id", "group.name", - "group.external_group_id", "model_set.id", "model_set.models", "model_set.name", - "permission_set.permissions", - "permission_set.name", "permission_set.id", + "permission_set.name", + "permission_set.permissions", + "user.dev_branch_name", "user.email", - "user.name", - "user.dev_branch_name" + "user.name" ], filters={"event.created_time": "10 minutes"}, sorts=["event.created_time desc"]