-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysisScratch.py
More file actions
182 lines (136 loc) · 5.45 KB
/
analysisScratch.py
File metadata and controls
182 lines (136 loc) · 5.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
'''
2025 WOOD-NT contact@nwoodweb.xyz
MIT LICENSE
This script is meant to automate the quantification of changes
in gap size during a gap closure/wound healing/scratch assay.
It uses image processing provided by scikit image and
multithreading by JobLib.
RETURNS
-------
gapclosure-quantify-<date_experiment>.csv : CSV ASCII text file
CSV file containing processed data
USER DEFINED PARAMETERS
-----------------------
data_directory: string, default="./"
directory of which the output CSV file will be placed
date_experiment: string, default="11-11-1111"
date of when experiment started
recommended format: DD-MM-YYYY
disk_size: u_int, default=25
neighborhood radius for entropy filter
hole_size: u_int, default=500
otsu segmentation may have holes inside the "true" region
representing migrating cells, hole_size is the maximum
hole size that the scikit-image parameter fill_hole()
should fill
input_directory: string, default="./tiffs"
file directory where images to be analyzed will occur
iteration:
defines the replicate that the image file is from
This is dependent on the naming system used by the user,
so will need to be determined based on position.
example: iteration = img[2:6]
number_parallel_jobs: u_int, default=4
defines number of CPU cores requested for JobLib multithreading
genuinely not sure what happens if you specify something silly
like 32768, but you can try :)
object_size: u_int, default=128
defines minimum object size for scikit-image parameter
remove_small_object()
ph: sliced string
defines the experimental condition (in my case pH) of the image
This is dependent on the naming system used by the user,
so will need to be determined based on position.
example: ph = img[2:6]
time_hour: sliced string
defines the time point of the image
This is dependent on the naming system used by the user,
so will need to be determined based on position.
example: time_hour = img[2:6]
'''
import os
import pandas as pd
import numpy as np
from tifffile import imread
from glob import glob
from joblib import Parallel, delayed
from skimage import data
from skimage.util import img_as_ubyte
from skimage.filters.rank import entropy
from skimage.filters import threshold_otsu, gaussian
from skimage.morphology import disk, remove_small_holes, remove_small_objects
# user defined parameters: file io
input_directory = os.path.expanduser("./tiffs/")
data_directory = os.path.expanduser("./")
date_experiment = "11-11-1111"
# user defined parameters: image analysis
disk_size = 25
hole_size = 1000
numberParallelJobs = 4
object_size = 500
output_file = "gapclosure-quantify-" + date_experiment + ".csv"
output_file = os.path.join(data_directory, output_file)
# for our 10X objective: um_width = um_length, 0.8850341
# for our 4X objective: um_width = um_length, 0.3540199
pixel_um_width = 0.3540199
pixel_um_length = pixel_um_width
pixel_um_square = pixel_um_width * pixel_um_length
data = []
def process_image(img):
'''
MORE USER DEFINED PARAMETERS
these three parameters are derived from the filename, you will
need to specify them based on how you do you file organization
and nomenclature
'''
# strip pH from filename
ph = img[18:19]
# strip iteration from filename
iteration = img[23:24]
# strip timepoint from filename
time_hour = img[32:34]
# load image and convert to 8 bit unsigned grayscale
image = imread(img)
image = img_as_ubyte(image)
# extract pixel area of image
image_size_pixel = image.size
# convert pixel size to um^2
image_size_um = image_size_pixel * pixel_um_square
# gaussian blur improves segmentation quality
# maybe experiment with low pass filters in future
image_gaussian_blur = gaussian(image, sigma=2)
# entropy filter
entropy_filter_image = entropy(image_gaussian_blur,
disk(disk_size))
# otsu segmentation
otsu_thresh = threshold_otsu(entropy_filter_image)
otsu_binary = entropy_filter_image <= otsu_thresh
# remove holes
otsu_binary = remove_small_holes(otsu_binary,
area_threshold=hole_size)
# remove small objects, not very effective
otsu_binary = remove_small_objects(otsu_binary,
min_size=object_size)
# extract pixel area of scratch area
area_gap_pixel = np.sum(otsu_binary == True)
# convert pixel area of scratch to um^2
area_gap_um = areaScratch_pixel * pixel_um_square
# convert to percent closure
percent_closure = 100 * (area_gap_um / image_size_um)
area_cells_um = image_size_um - area_gap_um
# organize output
data = [date_experiment, img, pH, iteration, time_hour,
area_gap_um, area_cells_um, percent_closure]
return data
'''
This monstrosity is the for joblib multithreading.
Thanks to: https://stackoverflow.com/questions/42220458/what-does-the-delayed-function-do-when-used-with-joblib-in-python
'''
dataframe = pd.DataFrame(Parallel(n_jobs=numberParallelJobs)(delayed(process_image)(img)for img in sorted(glob(os.path.join(input_directory, "*.tif")))))
# add headers to output dataframe
dataframe_headers = ['date','file','pH','iteration','timePoint',
'area_scratch_um','area_cells_um',
'percentClosure']
dataframe.columns = dataframe_headers
# write to csv
dataframe.to_csv(output_file, sep=',', encoding='utf-8')