Games Task Scheduler (GTS)
A multi-processor scheduling framework for games engines
7_strongly_dynamic_task_graph_wavefront.h
1 /*******************************************************************************
2 * Copyright 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files(the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions :
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 ******************************************************************************/
22 #pragma once
23 
24 #include <inttypes.h>
25 #include <vector>
26 #include <chrono>
27 
28 #include <gts/micro_scheduler/WorkerPool.h>
29 #include <gts/micro_scheduler/MicroScheduler.h>
30 
31 using namespace gts;
32 
33 namespace gts_examples {
34 
37 struct GridCell
38 {
39  uint64_t sum = 0;
40  Atomic<uint8_t> dependecies = { 0 };
41 };
42 
43 //------------------------------------------------------------------------------
44 Task* calcuateSum(GridCell** grid, int ii, int jj, int width, int height, TaskContext const& ctx)
45 {
46  GridCell& myCell = grid[ii][jj];
47  myCell.sum += (ii != 0 ? grid[ii-1][jj].sum : 0) + (jj != 0 ? grid[ii][jj-1].sum : 0);
48 
49  bool hasBottomSuccessor = (ii + 1 < width) && (grid[ii+1][jj].dependecies.fetch_sub(1, memory_order::acq_rel) - 1 == 0);
50  bool hasRightSuccessor = (jj + 1 < height) && (grid[ii][jj+1].dependecies.fetch_sub(1, memory_order::acq_rel) - 1 == 0);
51 
52  // If there are ready successors,
53  if(hasBottomSuccessor || hasRightSuccessor)
54  {
55  // We have to use continuations because blocking will cause a stack overflow.
56  // Since there is nothing to continue to, we use an empty task.
57  Task* pContinuation = ctx.pMicroScheduler->allocateTask<EmptyTask>();
58  ctx.pThisTask->setContinuationTask(pContinuation);
59 
60  // Add references for the successors.
61  uint32_t refCount = (hasBottomSuccessor ? 1 : 0) + (hasRightSuccessor ? 1 : 0);
62  pContinuation->addRef(refCount, memory_order::relaxed);
63 
64  if (hasBottomSuccessor)
65  {
66  Task* pSuccessor = ctx.pMicroScheduler->allocateTask(calcuateSum, grid, ii + 1, jj, width, height);
67  pContinuation->addChildTaskWithoutRef(pSuccessor);
68  ctx.pMicroScheduler->spawnTask(pSuccessor);
69  }
70 
71  if (hasRightSuccessor)
72  {
73  Task* pSuccessor = ctx.pMicroScheduler->allocateTask(calcuateSum, grid, ii, jj + 1, width, height);
74  pContinuation->addChildTaskWithoutRef(pSuccessor);
75  ctx.pMicroScheduler->spawnTask(pSuccessor);
76  }
77  }
78 
79  return nullptr;
80 }
81 
82 //------------------------------------------------------------------------------
83 void stronglyDynamicTaskGraph_wavefront(int width, int height)
84 {
85  printf ("================\n");
86  printf ("stronglyDynamicTaskGraph_wavefront\n");
87  printf ("================\n");
88 
89  /*
90  The following examples generate a task graph over a 2D grid, were each
91  task computes a 2D prefix sum S(i,j) = S(i-1,j) + S(i,j-1).
92 
93  (0,0) --------> (1,0) --------> ... -> (width-1,0)
94  | | |
95  V V V
96  (0,1) --------> (1,1) --------> ... -> (width-1,1)
97  | | |
98  V V V
99  . . .
100  . . .
101  . . .
102  | | |
103  V V V
104  (0,height-1) -> (1,height-1) -> ... -> (width-1,height-1)
105 
106  This example generates the graph as the execution unfolds. It moves the
107  data out of the Tasks and into a memoization table. The benefits are:
108  1. Task creation is distributed.
109  2. all computed data is available after execution.
110  3. No special treatment is require for the last Task like in
111  weaklyDynamicTaskGraph_wavefront.
112  */
113 
114 
115  // Init boilerplate
116  WorkerPool workerPool;
117  bool result = workerPool.initialize();
118  GTS_ASSERT(result);
119  MicroScheduler microScheduler;
120  result = microScheduler.initialize(&workerPool);
121  GTS_ASSERT(result);
122 
123  auto start = std::chrono::high_resolution_clock::now();
124 
125  // Build the grid.
126  GridCell** grid = new GridCell*[width];
127  for (int ii = width - 1; ii >= 0; --ii)
128  {
129  grid[ii] = new GridCell[height];
130 
131  for (int jj = height - 1; jj >= 0; --jj)
132  {
133  grid[ii][jj].dependecies.store((ii > 0) + (jj > 0), memory_order::relaxed);
134  }
135  }
136 
137  // Set the initial value of the sum at the root.
138  grid[0][0].sum = 1;
139 
140  Task* pTask = microScheduler.allocateTask(calcuateSum, grid, 0, 0, width, height);
141  microScheduler.spawnTaskAndWait(pTask);
142 
143  printf("Sum: %" PRIu64 "\n", grid[width-1][height-1].sum);
144 
145  for (int ii = width - 1; ii >= 0; --ii)
146  {
147  delete[] grid[ii];
148  }
149  delete[] grid;
150 
151  auto end = std::chrono::high_resolution_clock::now();
152  std::cout << "Time (ms): " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << std::endl;
153 }
154 
155 //------------------------------------------------------------------------------
156 void wavefront(uint64_t** grid, int ii, int jj, int dimensions)
157 {
158  for (int x = ii; x < ii + dimensions; ++x)
159  {
160  for (int y = jj; y < jj + dimensions; ++y)
161  {
162  grid[x][y] += (x > 0 ? grid[x-1][y] : 0) + (y > 0 ? grid[x][y-1] : 0);
163  }
164  }
165 }
166 
167 //------------------------------------------------------------------------------
168 Task* wavefrontDivAndConq(uint64_t** grid, int ii, int jj, int dimensions, TaskContext const& ctx)
169 {
170  if (dimensions <= 16)
171  {
172  wavefront(grid, ii, jj, dimensions);
173  }
174  else
175  {
176  int halfDim = dimensions / 2;
177 
178  // Do this block's recursive work.
179  Task* pChild = ctx.pMicroScheduler->allocateTask(wavefrontDivAndConq, grid, ii, jj, halfDim);
180  ctx.pThisTask->addRef(2, memory_order::relaxed);
181  ctx.pThisTask->addChildTaskWithoutRef(pChild);
182  ctx.pThisTask->spawnAndWaitForAll(pChild);
183 
184  // Create the continuation for the 2 child blocks.
185  Task* pContinuation = ctx.pMicroScheduler->allocateTask(wavefrontDivAndConq, grid, ii + halfDim, jj + halfDim, halfDim);
186  ctx.pThisTask->setContinuationTask(pContinuation);
187  pContinuation->addRef(2, memory_order::relaxed);
188 
189  //
190  // Create and spawn the 2 child blocks.
191 
192  pChild = ctx.pMicroScheduler->allocateTask(wavefrontDivAndConq, grid, ii + halfDim, jj, halfDim);
193  pContinuation->addChildTaskWithoutRef(pChild);
194  ctx.pMicroScheduler->spawnTask(pChild);
195 
196  pChild = ctx.pMicroScheduler->allocateTask(wavefrontDivAndConq, grid, ii, jj + halfDim, halfDim);
197  pContinuation->addChildTaskWithoutRef(pChild);
198  ctx.pMicroScheduler->spawnTask(pChild);
199  }
200 
201  return nullptr;
202 }
203 
204 //------------------------------------------------------------------------------
205 void stronglyDynamicTaskGraph_divideAndConquerWavefront(int dim)
206 {
207  printf ("================\n");
208  printf ("stronglyDynamicTaskGraph_divideAndConquerWavefront\n");
209  printf ("================\n");
210 
211  /*
212  The following examples generate a task graph over a 2D grid, were each
213  task computes a 2D prefix sum S(i,j) = S(i-1,j) + S(i,j-1).
214 
215  (0,0) --------> (1,0) --------> ... -> (dim-1,0)
216  | | |
217  V V V
218  (0,1) --------> (1,1) --------> ... -> (dim-1,1)
219  | | |
220  V V V
221  . . .
222  . . .
223  . . .
224  | | |
225  V V V
226  (0,dim-1) ----> (1,dim-1) ----> ... -> (dim-1,dim-1)
227  */
228 
229  // Init boilerplate
230  WorkerPool workerPool;
231  bool result = workerPool.initialize();
232  GTS_ASSERT(result);
233  MicroScheduler microScheduler;
234  result = microScheduler.initialize(&workerPool);
235  GTS_ASSERT(result);
236 
237  auto start = std::chrono::high_resolution_clock::now();
238 
239  uint64_t* buffer = new uint64_t[dim*dim];
240  uint64_t** grid = new uint64_t*[dim];
241  for (int ii = 0; ii < dim; ++ii)
242  {
243  grid[ii] = buffer + dim * ii;
244  memset(grid[ii], 0, sizeof(uint64_t) * dim);
245  }
246 
247  grid[0][0] = 1;
248 
249  Task* pTask = microScheduler.allocateTask(wavefrontDivAndConq, grid, 0, 0, dim);
250  microScheduler.spawnTaskAndWait(pTask);
251 
252  printf("Sum: %" PRIu64 "\n", grid[dim-1][dim-1]);
253 
254  delete[] grid;
255  delete[] buffer;
256 
257  auto end = std::chrono::high_resolution_clock::now();
258  std::cout << "Time (ms): " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << std::endl;
259 }
260 
261 } // namespace gts_examples
A empty Task that can be used as dummy or placeholder.
Definition: Task.h:355
A work-stealing task scheduler. The scheduler is executed by the WorkerPool it is initialized with.
Definition: MicroScheduler.h:81
void spawnTaskAndWait(Task *pTask, uint32_t priority=0)
Spawns the specified 'pTask' to be executed by the scheduler and then waits for its reference count t...
void spawnTask(Task *pTask, uint32_t priority=0)
Spawns the specified 'pTask' to be executed by the scheduler. Spawned tasks are executed in LIFO orde...
bool initialize(WorkerPool *pWorkerPool)
Initializes the MicroScheduler and attaches it to pWorkPool, where each worker in pWorkPool will exec...
GTS_INLINE TTask * allocateTask(TArgs &&... args)
Allocates a new Task object of type TTask.
Definition: MicroScheduler.h:145
A Task payload that embeds TFunc and TArgs into the Task's data. It makes it easy to construct a Task...
Definition: Task.h:120
GTS_INLINE void setContinuationTask(Task *pContinuation)
Definition: Task.inl:48
void spawnAndWaitForAll(Task *pChild)
GTS_INLINE int32_t addRef(int32_t count=1, gts::memory_order order=gts::memory_order::seq_cst)
Definition: Task.inl:84
GTS_INLINE void addChildTaskWithoutRef(Task *pChild)
Definition: Task.inl:30
A collection of running Worker threads that a MicroScheduler can be run on.
Definition: WorkerPool.h:54
bool initialize(uint32_t threadCount=0)
#define GTS_ASSERT(expr)
Causes execution to break when expr is false.
Definition: Assert.h:144
The context associated with the task being executed.
Definition: MicroSchedulerTypes.h:54
Task * pThisTask
The current task.
Definition: MicroSchedulerTypes.h:71
MicroScheduler * pMicroScheduler
The MicroScheduler executing the Task.
Definition: MicroSchedulerTypes.h:59
Definition: 7_strongly_dynamic_task_graph_wavefront.h:38