@@ -185,19 +185,28 @@ dna_seq = np.array([0, 1, 2, 3, 0, 0, 1, 2, 3, 3, 2, 1, 0, 0, 2, 3]) # "ACGTAAC
185
185
186
186
``` python
187
187
# 1. First 5 nucleotides
188
- print (" First 5 nucleotides:" , dna_seq[:5 ]) # array([0, 1, 2, 3, 0]) = "ACGTA"
188
+ print (" First 5 nucleotides:" , dna_seq[:5 ])
189
189
190
190
# 2. Last 4 nucleotides
191
- print (" Last 4 nucleotides:" , dna_seq[- 4 :]) # array([0, 0, 2, 3]) = "AAGT"
191
+ print (" Last 4 nucleotides:" , dna_seq[- 4 :])
192
192
193
193
# 3. Every third nucleotide
194
- print (" Every third nucleotide:" , dna_seq[::3 ]) # array([0, 0, 3, 0, 3]) = "AATA"
194
+ print (" Every third nucleotide:" , dna_seq[::3 ])
195
195
196
196
# 4. Subsequence from position 6 to 10
197
- print (" Subsequence pos 6-10:" , dna_seq[6 :11 ]) # array([1, 2, 3, 3, 2]) = "CGTTG"
197
+ print (" Subsequence pos 6-10:" , dna_seq[6 :11 ])
198
198
# Note: Upper bound is exclusive in slicing, so we use 11 to include position 10
199
199
```
200
200
201
+ Output
202
+
203
+ ``` none
204
+ First 5 nucleotides: [0 1 2 3 0]
205
+ Last 4 nucleotides: [0 0 2 3]
206
+ Every third nucleotide: [0 3 1 3 0 3]
207
+ Subsequence pos 6-10: [1 2 3 3 2]
208
+ ```
209
+
201
210
:::
202
211
203
212
:::{exercise}
@@ -220,31 +229,43 @@ gene_expr = np.array([
220
229
** Tasks:**
221
230
222
231
1 . Extract the expression values for Gene 3
223
- 2 . Extract the expression values for all genes under condition 4 ( fifth column)
224
- 3 . Extract a sub-matrix containing Genes 2-4 under conditions 2-3
225
- 4 . Find the expression value for Gene 5 under condition 2
232
+ 2 . Extract the expression values for all genes under fifth column
233
+ 3 . Extract a sub-matrix containing Genes 2-4 under columns 2-3
234
+ 4 . Find the expression value for Gene 5 under columns 2
226
235
227
236
:::
228
237
229
238
:::{solution}
230
239
231
240
``` python
232
241
# 1. Expression values for Gene 3
233
- print (" Gene 3 expression:" , gene_expr[2 ]) # array([8.4, 7.5, 9.2, 8.1, 10.5])
242
+ print (" Gene 3 expression:" , gene_expr[2 ])
234
243
# Alternative: gene_expr[2, :]
235
244
236
- # 2. Expression values for all genes under condition 4
237
- print (" Condition 4 expression:" , gene_expr[:, 4 ]) # array([25.3, 19.7, 10.5, 36.2, 18.2])
245
+ # 2. Expression values for all genes under column 5
246
+ print (" Condition 4 expression:" , gene_expr[:, 4 ])
238
247
239
- # 3. Sub-matrix of Genes 2-4 under conditions 2-3
240
- print (" Sub-matrix (Genes 2-4, Conditions 2-3):" )
248
+ # 3. Sub-matrix of Genes 2-4 under columns 2-3
249
+ print (" Sub-matrix (Genes 2-4, columns 2-3):" )
241
250
print (gene_expr[1 :4 , 1 :3 ])
242
251
# array([[38.1, 29.6],
243
252
# [7.5, 9.2],
244
253
# [29.8, 27.5]])
245
254
246
- # 4. Expression value for Gene 5 under condition 2
247
- print (" Gene 5, Condition 2:" , gene_expr[4 , 1 ]) # 19.8
255
+ # 4. Expression value for Gene 5 under columns 2
256
+ print (" Gene 5, columns 2:" , gene_expr[4 , 1 ])
257
+ ```
258
+
259
+ Output
260
+
261
+ ``` none
262
+ Gene 3 expression: [ 8.4 7.5 9.2 8.1 10.5]
263
+ Condition 4 expression: [25.3 19.7 10.5 36.2 18.2]
264
+ Sub-matrix (Genes 2-4, columns 2-3):
265
+ [[38.1 29.6]
266
+ [ 7.5 9.2]
267
+ [29.8 27.5]]
268
+ Gene 5, columns 2: 19.8
248
269
```
249
270
250
271
:::
@@ -253,7 +274,7 @@ print("Gene 5, Condition 2:", gene_expr[4, 1]) # 19.8
253
274
254
275
## Exercise 3: Multi-sequence Alignment Analysis (2-3 minutes)
255
276
256
- Consider a simplified alignment scoring matrix where each row represents a protein sequence and each column represents a position in the alignment:
277
+ Consider a simplified alignment scoring matrix where each row represents a match (1) or mismatch (0) and each column represents a position in the alignment:
257
278
258
279
``` python
259
280
import numpy as np
@@ -280,79 +301,33 @@ alignment_scores = np.array([
280
301
# 1. Positions where all sequences match
281
302
all_match = np.all(alignment_scores == 1 , axis = 0 )
282
303
print (" Positions where all sequences match:" , np.where(all_match)[0 ])
283
- # array([3]) - only position 3 has all matches
284
304
285
305
# 2. Scores for positions 3-7 for all sequences
286
306
print (" Positions 3-7 scores:" )
287
307
print (alignment_scores[:, 3 :8 ])
288
- # array([[1, 0, 1, 0, 0],
289
- # [1, 0, 0, 1, 1],
290
- # [1, 1, 0, 0, 1],
291
- # [1, 1, 1, 0, 0]])
292
308
293
309
# 3. Matching pattern for Sequence 3
294
310
seq3_matches = alignment_scores[2 ] == 1
295
311
print (" Sequence 3 match positions:" , np.where(seq3_matches)[0 ])
296
- # array([1, 2, 3, 4, 7])
297
312
298
313
# 4. Sub-alignment of first two sequences for last five positions
299
314
print (" Sub-alignment (Seq 1-2, last 5 positions):" )
300
315
print (alignment_scores[0 :2 , 5 :])
301
- # array([[1, 0, 0, 1, 1],
302
- # [0, 1, 1, 0, 1]])
303
316
```
304
317
305
- :::
306
-
307
- :::{exercise}
308
-
309
- ## Exercise 4: Combining Indexing and Boolean Operations (2-3 minutes)
310
-
311
- Using the gene expression matrix from Exercise 2:
312
-
313
- ``` python
314
- import numpy as np
315
- gene_expr = np.array([
316
- [15.2 , 21.5 , 18.9 , 11.8 , 25.3 ], # Gene 1
317
- [42.3 , 38.1 , 29.6 , 33.2 , 19.7 ], # Gene 2
318
- [8.4 , 7.5 , 9.2 , 8.1 , 10.5 ], # Gene 3
319
- [31.6 , 29.8 , 27.5 , 34.9 , 36.2 ], # Gene 4
320
- [17.3 , 19.8 , 22.5 , 21.3 , 18.2 ] # Gene 5
321
- ])
322
- ```
323
-
324
- ** Tasks:**
325
-
326
- 1 . Find all expression values greater than 30
327
- 2 . Identify which genes have at least one expression value greater than 30
328
- 3 . Create a boolean mask showing positions where expression is between 15 and 25
329
- 4 . Extract all expression values from condition 2 that are less than 20
330
-
331
- :::
332
-
333
- :::{solution}
334
-
335
- ``` python
336
- # 1. Expression values greater than 30
337
- high_expr = gene_expr > 30
338
- print (" Values > 30:" , gene_expr[high_expr])
339
- # array([42.3, 38.1, 31.6, 33.2, 34.9, 36.2])
340
-
341
- # 2. Genes with at least one expression value > 30
342
- genes_with_high_expr = np.any(gene_expr > 30 , axis = 1 )
343
- print (" Genes with expression > 30:" , np.where(genes_with_high_expr)[0 ])
344
- # array([1, 3]) - Gene 2 and Gene 4 (indices 1 and 3)
345
-
346
- # 3. Boolean mask for expression between 15 and 25
347
- mid_range_expr = (gene_expr >= 15 ) & (gene_expr <= 25 )
348
- print (" Expression between 15-25:" )
349
- print (mid_range_expr)
350
- # Boolean matrix where True indicates values between 15-25
351
-
352
- # 4. Expression values from condition 2 that are less than 20
353
- condition2_low = gene_expr[:, 1 ] < 20
354
- print (" Condition 2 values < 20:" , gene_expr[:, 1 ][condition2_low])
355
- # array([7.5, 19.8])
318
+ Output
319
+
320
+ ``` none
321
+ Positions where all sequences match: [3]
322
+ Positions 3-7 scores:
323
+ [[1 0 1 0 0]
324
+ [1 0 0 1 1]
325
+ [1 1 0 0 1]
326
+ [1 1 1 0 0]]
327
+ Sequence 3 match positions: [1 2 3 4 7]
328
+ Sub-alignment (Seq 1-2, last 5 positions):
329
+ [[1 0 0 1 1]
330
+ [0 1 1 0 1]]
356
331
```
357
332
358
333
:::
0 commit comments