|
6 | 6 | # </a>
|
7 | 7 |
|
8 | 8 | # # Overview
|
9 |
| -# |
| 9 | +# |
10 | 10 | # This example demonstrates how to use Mat3ra RESTful API to build a machine learning (ML) model for a set of materials called "train materials" and use the model to predict properties of another set called "target materials". The general approach can work for multiple properties, we use the Electronic Band Gap in this example.
|
11 |
| -# |
12 |
| -# |
13 |
| -# |
| 11 | +# |
| 12 | +# |
| 13 | +# |
14 | 14 | # ## Steps
|
15 |
| -# |
| 15 | +# |
16 | 16 | # We follow the below steps:
|
17 |
| -# |
| 17 | +# |
18 | 18 | # - Import materials from [materials project](https://materialsproject.org/)
|
19 | 19 | # - Calculate band gap for the "train materials"
|
20 | 20 | # - Build ML Train model based on the "train materials"
|
21 | 21 | # - Create and submit a job to predict band gap for the "target materials"
|
22 | 22 | # - Extract band gap for "target materials"
|
23 | 23 | # - Output the results as Pandas dataFrame
|
24 |
| -# |
| 24 | +# |
25 | 25 | # ## Pre-requisites
|
26 |
| -# |
| 26 | +# |
27 | 27 | # The explanation below assumes that the reader is familiar with the concepts used in Mat3ra platform and RESTful API. We outline these below and direct the reader to the original sources of information:
|
28 |
| -# |
| 28 | +# |
29 | 29 | # - [Generating RESTful API authentication parameters](../system/get_authentication_params.ipynb)
|
30 | 30 | # - [Importing materials from materials project](../material/import_materials_from_materialsproject.ipynb)
|
31 | 31 | # - [Creating and submitting jobs](./create_and_submit_job.ipynb)
|
32 | 32 | # - [Running DFT calculations](./run-simulations-and-extract-properties.ipynb)
|
33 | 33 |
|
34 | 34 | # # Complete Authorization Form and Initialize Settings
|
35 |
| -# |
| 35 | +# |
36 | 36 | # This will also determine environment and set all environment variables. We determine if we are using Jupyter Notebooks or Google Colab to run this tutorial.
|
37 |
| -# |
| 37 | +# |
38 | 38 | # If you are running this notebook from Google Colab, Colab takes ~1 min to execute the following cell.
|
39 |
| -# |
| 39 | +# |
40 | 40 | # ACCOUNT_ID and AUTH_TOKEN - Authentication parameters needed for when making requests to [Mat3ra.com's API Endpoints](https://docs.mat3ra.com/rest-api/endpoints/).
|
41 |
| -# |
| 41 | +# |
42 | 42 | # MATERIALS_PROJECT_API_KEY - Authentication parameter needed for when making requests to [Material Project's API](https://materialsproject.org/open)
|
43 |
| -# |
| 43 | +# |
44 | 44 | # ORGANIZATION_ID - Authentication parameter needed for when working with collaborative accounts https://docs.mat3ra.com/collaboration/organizations/overview/
|
45 |
| -# |
| 45 | +# |
46 | 46 | # > <span style="color: orange">**NOTE**</span>: If you are running this notebook from Jupyter, the variables ACCOUNT_ID, AUTH_TOKEN, MATERIALS_PROJECT_API_KEY, and ORGANIZATION_ID should be set in the file [settings.json](../../utils/settings.json) if you need to use these variables. To obtain API token parameters, please see the following link to the documentation explaining how to get them: https://docs.mat3ra.com/accounts/ui/preferences/api/
|
47 | 47 |
|
48 | 48 | # In[ ]:
|
|
96 | 96 | from exabyte_api_client.endpoints.materials import MaterialEndpoints
|
97 | 97 | from exabyte_api_client.endpoints.workflows import WorkflowEndpoints
|
98 | 98 | from exabyte_api_client.endpoints.bank_workflows import BankWorkflowEndpoints
|
99 |
| -from exabyte_api_client.endpoints.raw_properties import RawPropertiesEndpoints |
| 99 | +from exabyte_api_client.endpoints.properties import PropertiesEndpoints |
100 | 100 |
|
101 | 101 |
|
102 | 102 | # #### Materials
|
103 |
| -# |
| 103 | +# |
104 | 104 | # Set parameters for the materials to be imported:
|
105 |
| -# |
| 105 | +# |
106 | 106 | # - **TRAIN_MATERIALS_PROJECT_IDS**: a list of material IDs to train ML model based on
|
107 | 107 | # - **TARGET_MATERIALS_PROJECT_IDS**: a list of material IDs to predict the property for
|
108 | 108 |
|
|
114 | 114 |
|
115 | 115 |
|
116 | 116 | # #### Jobs
|
117 |
| -# |
| 117 | +# |
118 | 118 | # Set parameters for the jobs to be ran for the imported materials:
|
119 |
| -# |
| 119 | +# |
120 | 120 | # - **JOB_NAME_PREFIX**: prefix to be used for the job name with "{JOB_NAME_PREFIX} {FORMULA}" convention (e.g. "Job Name Prefix - SiGe")
|
121 | 121 |
|
122 | 122 | # In[ ]:
|
|
126 | 126 |
|
127 | 127 |
|
128 | 128 | # #### Compute
|
129 |
| -# |
| 129 | +# |
130 | 130 | # Setup compute parameters. See [this](https://docs.mat3ra.com/infrastructure/compute-settings/ui) for more information about compute parameters.
|
131 |
| -# |
| 131 | +# |
132 | 132 | # - **NODES**: Number of nodes. Defaults to 1.
|
133 | 133 | # - **PPN**: Number of MPI processes per each node, Defaults to 1.
|
134 | 134 | # - **QUEUE**: The name of queue to submit the jobs into. Defaults to D.
|
|
155 | 155 | material_endpoints = MaterialEndpoints(*ENDPOINT_ARGS)
|
156 | 156 | workflow_endpoints = WorkflowEndpoints(*ENDPOINT_ARGS)
|
157 | 157 | bank_workflow_endpoints = BankWorkflowEndpoints(*ENDPOINT_ARGS)
|
158 |
| -raw_property_endpoints = RawPropertiesEndpoints(*ENDPOINT_ARGS) |
| 158 | +property_endpoints = PropertiesEndpoints(*ENDPOINT_ARGS) |
159 | 159 |
|
160 | 160 |
|
161 | 161 | # Retrieve the owner and project IDs as they are needed by the endpoints. The default material is used to extract the owner ID. One can extract the owner ID from any other account's [entities](https://docs.mat3ra.com/entities-general/overview/).
|
|
168 | 168 |
|
169 | 169 |
|
170 | 170 | # ### Create workflows
|
171 |
| -# |
| 171 | +# |
172 | 172 | # Copy "ML: Train Model" and "Band Gap" bank workflows to the account's workflows. We use exabyte bank workflows which are identified by "systemName" field. The below can be adjusted to get the bank workflows by ID.
|
173 | 173 |
|
174 | 174 | # In[ ]:
|
|
179 | 179 |
|
180 | 180 |
|
181 | 181 | # ### Import materials
|
182 |
| -# |
| 182 | +# |
183 | 183 | # Import materials from materials project.
|
184 | 184 |
|
185 | 185 | # In[ ]:
|
|
194 | 194 |
|
195 | 195 |
|
196 | 196 | # ### Calculate Properties for "train materials"
|
197 |
| -# |
| 197 | +# |
198 | 198 | # Create jobs for the "train materials".
|
199 | 199 |
|
200 | 200 | # In[ ]:
|
|
225 | 225 |
|
226 | 226 |
|
227 | 227 | # ### Build ML Train model
|
228 |
| -# |
| 228 | +# |
229 | 229 | # Create ML Train job for the train materials.
|
230 | 230 |
|
231 | 231 | # In[ ]:
|
|
254 | 254 |
|
255 | 255 |
|
256 | 256 | # ### Extract ML model as workflow
|
257 |
| -# |
| 257 | +# |
258 | 258 | # The resulting trained model is extracted from the last unit (train with index 4) of the first job's subworkflow (ML: Train Model with index 0) and is further referred to as "ML predict workflow".
|
259 | 259 |
|
260 | 260 | # In[ ]:
|
261 | 261 |
|
262 | 262 |
|
263 | 263 | ml_predict_workflow = get_property_by_subworkflow_and_unit_indicies(
|
264 |
| - raw_property_endpoints, "workflow:ml_predict", job, 0, 4 |
| 264 | + property_endpoints, "workflow:ml_predict", job, 0, 4 |
265 | 265 | )["data"]
|
266 | 266 | ml_predict_workflow_id = ml_predict_workflow["_id"]
|
267 | 267 |
|
|
275 | 275 |
|
276 | 276 |
|
277 | 277 | # ### Predict property using the model
|
278 |
| -# |
| 278 | +# |
279 | 279 | # Create ML Predict job for the predict materials.
|
280 | 280 |
|
281 | 281 | # In[ ]:
|
|
304 | 304 |
|
305 | 305 |
|
306 | 306 | # ### Extract predicted properties
|
307 |
| -# |
| 307 | +# |
308 | 308 | # Predicted properties are extracted from the last unit (score with index 3) of the first job's subworkflow (ml_predict_subworkflow with index 0).
|
309 | 309 |
|
310 | 310 | # In[ ]:
|
311 | 311 |
|
312 | 312 |
|
313 | 313 | predicted_properties = get_property_by_subworkflow_and_unit_indicies(
|
314 |
| - raw_property_endpoints, "predicted_properties", job, 0, 3 |
| 314 | + property_endpoints, "predicted_properties", job, 0, 3 |
315 | 315 | )["data"]["values"]
|
316 | 316 |
|
317 | 317 |
|
318 | 318 | # ### Flatten results
|
319 |
| -# |
| 319 | +# |
320 | 320 | # The below for-loop iterates over the results and flatten them to form the final Pandas dataFrame.
|
321 | 321 |
|
322 | 322 | # In[ ]:
|
|
334 | 334 |
|
335 | 335 |
|
336 | 336 | # ### Ouput results
|
337 |
| -# |
| 337 | +# |
338 | 338 | # Create and print the final table as Pandas dataFrame.
|
339 | 339 |
|
340 | 340 | # In[ ]:
|
|
0 commit comments