9 Modules and Packages¶
9.1 Exercises¶
9.1.1 Exercise¶
Implement a matrix and functions to handle it. choose the data structure of your choice. The API (Application Programming Interface) to implement is the following:
We propose 2 implementations. These 2 implementations work with a list of lists as matrix modelling. But it is possible to implement it with a single list or a dict of list, …
The first implementation follow the api used explicit name for inner variables and good documentation.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | """
Implementation of simple matrix
"""
def create(row_num, col_num, val=None):
"""
:param row_num: the number of rows
:type row_num: int
:param col_num: the number of columns
:type col_num: int
:param val: the default value to fill the matrix
:type val: any (None by default)
:return: matrix of rows_num x col_num
:rtype: matrix
"""
matrix = []
for i in range(col_num):
col = [val] * row_num
matrix.append(col)
return matrix
def _check_index(matrix, row_no, col_no):
"""
check if row_no and col_no are in matrix bound
:param matrix: the matrix to compute the size
:type matrix: matrix
:param rows_no: the index of row to check
:type rows_no: int
:param col_no: the index of column to check
:type col_no: int
:raise: IndexError if row_no or col_no are out of matrix bounds
"""
row_max, col_max = size(matrix)
if (row_no < 0 or row_no >= row_max) or (col_no < 0 or col_no >= col_max):
raise IndexError("matrix index out of range")
def size(matrix):
"""
:param matrix: the matrix to compute the size
:type matrix: matrix
:return: the size of matrix (number of rows, number of cols)
:rtype: typle of 2 int
"""
return len(matrix[0]), len(matrix)
def get_cell(matrix, row_no, col_no):
"""
:param matrix: the matrix
:type matrix: matrix
:param rows_no: the row number
:type rows_no: int
:param col_no: the column number
:type col_no: int
:retrun: the content of cell corresponding to row_no x col_no
:rtype: any
"""
_check_index(matrix, row_no, col_no)
return matrix[col_no][row_no]
def set_cell(matrix, row_no, col_no, val):
"""
set the value val in cell specified by row_no x col_no
:param matrix: the matrix to modify
:type matrix: matrix
:param row_no: the row number of cell to set
:type rows_no: int
:param col_no: the column number of cell to set
:type col_no: int
:param val: the value to set in cell
:type val: int
"""
_check_index(matrix, row_no, col_no)
matrix[col_no][row_no] = val
def to_str(matrix):
"""
:param matrix: the matrix to compute the size
:type matrix: matrix
:return: a string representation of the matrix
:rtype: str
"""
s = ""
# by design all matrix cols have same size
for row in zip(*matrix):
cells = [str(cell) for cell in row]
s += " ".join(cells) + "\n"
return s
def mult(matrix, val):
"""
:param matrix: the matrix to compute the size
:type matrix: matrix
:param rows_no: the number of rows
:type rows_no: int
:param col_no: the number of columns
:type col_no: int
:param val: the value to mult the matrix with
:type val: int
:return: a new matrix corresponding the scalar product of matrix * val
:rtype: matrix
"""
new_matrix = []
for col in matrix:
new_col = [cell * val for cell in col]
new_matrix.append(new_col)
return new_matrix
def get_row(matrix, row_no):
"""
:param matrix: the matrix to compute the size
:type matrix: matrix
:param rows_no: row number
:type rows_no: int
:return: the row of matrix corresponding to row_no
a shallow copy of the row
:rtype: list
"""
_check_index(matrix, row_no, 0)
row_max, col_max = size(matrix)
row = []
for col_n in range(col_max):
row.append(get_cell(matrix, row_no, col_n))
return row
def set_row(matrix, row_no, val):
"""
set all cells of row row_no with val
:param matrix: the matrix to modify
:type matrix: matrix
:param row_no: the row number
:type row_no: int
:param val: the value to put in cells
:type val: any
"""
_check_index(matrix, row_no, 0)
row_max, col_max = size(matrix)
for col_n in range(col_max):
set_cell(matrix, row_no, col_n, val)
def get_col(matrix, col_no):
"""
:param matrix: the matrix get row
:type matrix: matrix
:param col_no: the column number
:type col_no: int
:return: the column corresponding to col_no of matrix
a shallow copy of the col
:rtype: list
"""
_check_index(matrix, 0, col_no)
col = matrix[col_no][:]
return col
def set_col(matrix, col_no, val):
"""
set all cells of col col_no with val
:param matrix: the matrix to compute the size
:type matrix: matrix
:param col_no: the column number
:type col_no: int
:param val: the value to put in cells
:type val: any
"""
_check_index(matrix, 0, col_no)
row_max, col_max = size(matrix)
for row_n in range(row_max):
set_cell(matrix, row_n, col_no, val)
def replace_col(matrix, col_no, col):
"""
replace column col_no with col
:param matrix: the matrix to compute the size
:type matrix: matrix
:param col_no: the column number to replace
:type col_no: int
:param col: the list of values to use as replacement of column
:type col: list
"""
row_max, col_max = size(matrix)
if len(col) != col_max:
raise RuntimeError("the size of col {0} does not fit to matrix size {1}x{2}".format(len(col),
row_max,
col_max))
_check_index(matrix, 0, col_no)
matrix[col_no] = col
def replace_row(matrix, row_no, row):
"""
replace row row_no with row
:param matrix: the matrix to compute the size
:type matrix: matrix
:param row_no: the column number
:type row_no: int
:param row: the list of value to use as replacement of row
:type row: list
"""
row_max, col_max = size(matrix)
if len(row) != row_max:
raise RuntimeError("the size of row {0} does not fit to matrix size {1}x{2}".format(len(row),
row_max,
col_max))
_check_index(matrix, row_no, 0)
for col_no, value in enumerate(row):
set_cell(matrix, row_no, col_no, value)
if __name__ == '__main__':
m = create(5, 3)
print(m)
set_cell(m, 0, 0, 1)
set_cell(m, 0, 2, 2)
set_cell(m, 4, 0, 12)
set_cell(m, 4, 2, 15)
print(to_str(m))
print("get row 0", get_row(m, 0))
print("get col 0", get_col(m, 0))
|
But the problem with this implementation is, if we decide to change the inner model for a dixt of list for instance. We must reimplements most of the functions.
In the following implementation we have only 4 functions that handle directly the lists. All other functions manipulate the matrix through these 4 functions. So if we change the inner model we will have to modifiy only these functions. This implementation will be more maintainable.
But this implementation use one letter names for inner variables and is poorly documented which not help to maintain or develop with this.
The Best solution should be the second implementation but with the name of variables and documentation as in the firsr implementation.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | # matrix is implemented by list of list
def matrix_maker(ligne, col, val=None):
m = []
for i in range(ligne):
c = [val] * col
m.append(c)
return m
# ---- functions that depends on the matrix srtructure
def matrix_size(m):
return len(m), len(m[0])
def matrix_get(matrix, i, j):
_check_matindex(matrix, i, j)
return matrix[i][j]
def matrix_set(matrix, i, j, val):
_check_matindex(matrix, i, j)
matrix[i][j] = val
def matrix_print(m):
im, jm = matrix_size(m)
for i in range(im):
print(m[i])
# ---- independant regarding matrix structure
def _check_matindex(matrix, i, j):
imax, jmax = matrix_size(matrix)
if (i < 0 or i >= imax) or (j < 0 or j >= jmax):
raise IndexError("matrix index out of range")
def matrix_get_line(matrix, i):
_check_matindex(matrix, i, 0)
im, jm = matrix_size(matrix)
line = []
for n in range(jm):
line.append(matrix_get(matrix, i, n))
return line
def matrix_set_line(matrix, i, val):
_check_matindex(matrix, i, 0)
im, jm = matrix_size(matrix)
for n in range(jm):
matrix_set(matrix, i, n, val)
def matrix_get_col(matrix, j):
_check_matindex(matrix, 0, j)
im, jm = matrix_size(matrix)
col = []
for n in range(im):
col.append(matrix_get(matrix, n, j))
return col
def matrix_set_col(matrix, j, val):
_check_matindex(matrix, 0, j)
im, jm = matrix_size(matrix)
for n in range(im):
matrix_set(matrix, n, j, val)
if __name__ == '__main__':
m = matrix_maker(5, 3)
matrix_set(m, 0, 0, 1)
matrix_set(m, 0, 2, 2)
matrix_set(m, 4, 0, 12)
matrix_set(m, 4, 2, 15)
matrix_print(m)
print("get line 0", matrix_get_line(m, 0))
print("get col 0", matrix_get_col(m, 0))
|
9.1.2 Exercise¶
Write a program that calculates the similarity of 2 RNA sequences.
- To compute the simalirity you need to parse a file containing the
similarity matrix
. Hint: use the module containing the functions that handle a matrix from previous chapter. put this matrix.py file in a directory named “my_python_lib” in your home or Desktop and import it in your current program (the similarity script must be placed elsewhere). - The similarity of the 2 sequences is the sum of base similarities. so you have to compare the first base of two sequences and use the matrix to get the similarity from the similarity table, on so on for all bases then sum these similarities.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | import sys
import os.path
sys.path.insert(0, os.path.join(expanduser('~'), "my_python_lib"))
import matrix
def parse_similarity_file(path):
"""
parse file containing RNA similarity matrix and return a matrix
"""
sim_matrix = matrix.create(4, 4)
with open(path, 'r') as sim_file:
#skip first line
sim_file.next()
for row_no, line in enumerate(sim_file):
line = line.strip()
fields = line.split()
values = [float(val) for val in fields[1:]]
matrix.replace_row(sim_matrix, row_no, values)
return sim_matrix
def get_similarity(b1, b2, sim_matrix):
"""
:param b1: the first base must be in ('A', 'G', 'C', 'U')
:type b1: string
:param b2: the first base must be in ('A', 'G', 'C', 'U')
:type b2: string
:param sim_matrix: a similarity matrix
:type sim_matrix: matrix
:return: the similarity between b1 and b2
:rtype: float
"""
bases = {'A':0 , 'G':1, 'C':2, 'U':3}
b1 = b1.upper()
b2 = b2.upper()
if not b1 in bases:
raise KeyError("unknown base b1: " + str(b1))
if not b2 in bases:
raise KeyError("unknown base b2: " + str(b2))
return matrix.get_cell(sim_matrix, bases[b1], bases[b2])
def compute_similarity(seq1, seq2, sim_matrix):
"""
compute a similarity score between 2 RNA sequence of same lenght
:param seq1: first sequence to compare
:type seq1: string
:param seq2: second sequence to compare
:type seq2: string
:param sim_matrix: the similarity between b1 and b2
:type sim_matrix: matrix
:return: the similarity score between seq1 and seq2
:rtype: float
"""
similarities = []
for b1, b2 in zip(seq1, seq2):
sim = get_similarity(b1, b2, sim_matrix)
similarities.append(sim)
return sum(similarities)
if __name__ == '__main__':
seq1 = 'AGCAUCUA'
seq2 = 'ACCGUUCU'
sim_matrix = parse_similarity_file("similarity_matrix")
print(matrix.to_str(sim_matrix))
similarity = compute_similarity(seq1, seq2, sim_matrix)
print(similarity)
|