diff --git a/loopy/options.py b/loopy/options.py index 9f12814b0..746e23dc2 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -95,6 +95,13 @@ class Options(ImmutableRecord): Whether loopy should issue an error if a dependency expression does not match any instructions in the kernel. + .. attribute:: use_dependencies_v2 + + Whether loopy should take into account the new statement-instance-level + dependencies during linearization, rather than the old, statement-level + dependencies. New dependencies are stored in the *dependencies* + attribute of each statement. + .. rubric:: Invocation-related options .. attribute:: skip_arg_checks @@ -242,6 +249,7 @@ def __init__( disable_global_barriers=kwargs.get("disable_global_barriers", False), check_dep_resolution=kwargs.get("check_dep_resolution", True), + use_dependencies_v2=kwargs.get("use_dependencies_v2", False), enforce_variable_access_ordered=kwargs.get( "enforce_variable_access_ordered", True), diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 5822f44ed..630ec0007 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -253,57 +253,100 @@ def find_loop_nest_around_map(kernel): return result -def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): +def find_loop_insn_dep_map( + kernel, loop_nest_with_map, loop_nest_around_map, + simplified_depends_on_graph): """Returns a dictionary mapping inames to other instruction ids that need to be scheduled before the iname should be eligible for scheduling. + + :arg loop_nest_with_map: Dictionary mapping iname1 to a set containing + iname2 iff either iname1 nests around iname2 or iname2 nests around + iname1 + + :arg loop_nest_around_map: Dictionary mapping iname1 to a set containing + iname2 iff iname2 nests around iname1 + + :arg simplified_depends_on_graph: Dictionary mapping depender statement IDs + to sets of dependee statement IDs, as produced by + `loopy.schedule.checker.dependency.filter_deps_by_intersection_with_SAME`, + which will be used to acquire depndee statement ids if + `kernel.options.use_dependencies_v2` is 'True' (otherwise old + dependencies in insn.depends_on will be used). + """ result = {} from loopy.kernel.data import ConcurrentTag, IlpBaseTag + # For each insn, examine its inames (`iname`) and its dependees' inames + # (`dep_iname`) to determine which instructions must be scheduled before + # entering the iname loop. + # Create result dict, which maps iname to instructions that must be + # scheduled prior to entering iname. + + # For each insn, loop over its non-concurrent inames (`iname`) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): + + # (Ignore concurrent inames) if kernel.iname_tags_of_type(iname, ConcurrentTag): continue - + # Let iname_dep be the set of ids associated with result[iname] + # (if iname is not already in result, add iname as a key) iname_dep = result.setdefault(iname, set()) - for dep_insn_id in insn.depends_on: + # Loop over instructions on which insn depends (dep_insn) + # and determine whether dep_insn must be schedued before + # iname, in which case add its id to iname_dep (result[iname]) + if kernel.options.use_dependencies_v2: + dependee_ids = simplified_depends_on_graph.get(insn.id, set()) + else: + dependee_ids = insn.depends_on + + for dep_insn_id in dependee_ids: if dep_insn_id in iname_dep: # already depending, nothing to check continue - dep_insn = kernel.id_to_insn[dep_insn_id] - dep_insn_inames = dep_insn.within_inames + dep_insn = kernel.id_to_insn[dep_insn_id] # Dependee + dep_insn_inames = dep_insn.within_inames # Dependee inames + # Check whether insn's iname is also in dependee inames if iname in dep_insn_inames: - # Nothing to be learned, dependency is in loop over iname + # Nothing to be learned, dependee is inside loop over iname # already. continue # To make sure dep_insn belongs outside of iname, we must prove - # that all inames that dep_insn will be executed in nest + # that all inames in which dep_insn will be executed nest # outside of the loop over *iname*. (i.e. nested around, or # before). + # Loop over each of the dependee's inames (dep_insn_iname) may_add_to_loop_dep_map = True for dep_insn_iname in dep_insn_inames: + + # If loop_nest_around_map says dep_insn_iname nests around + # iname, dep_insn_iname is guaranteed to nest outside of + # iname, we're safe, so continue if dep_insn_iname in loop_nest_around_map[iname]: - # dep_insn_iname is guaranteed to nest outside of iname - # -> safe. continue + # If dep_insn_iname is concurrent, continue + # (parallel tags don't really nest, so disregard them here) if kernel.iname_tags_of_type(dep_insn_iname, (ConcurrentTag, IlpBaseTag)): - # Parallel tags don't really nest, so we'll disregard - # them here. continue + # If loop_nest_with_map says dep_insn_iname does not nest + # inside or around iname, it must be nested separately; + # we're safe, so continue if dep_insn_iname not in loop_nest_with_map.get(iname, []): - # dep_insn_iname does not nest with iname, so its nest - # must occur outside. continue + # If none of the three cases above succeeds for any + # dep_insn_iname in dep_insn_inames, we cannot add dep_insn + # to iname's set of insns in result dict. may_add_to_loop_dep_map = False break @@ -318,6 +361,9 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): dep_insn=dep_insn_id, insn=insn.id)) + # If at least one of the three cases above succeeds for every + # dep_insn_iname, we can add dep_insn to iname's set of insns + # in result dict. iname_dep.add(dep_insn_id) return result @@ -333,16 +379,24 @@ def group_insn_counts(kernel): return result -def gen_dependencies_except(kernel, insn_id, except_insn_ids): - insn = kernel.id_to_insn[insn_id] - for dep_id in insn.depends_on: +def gen_dependencies_except( + kernel, insn_id, except_insn_ids, simplified_depends_on_graph): + + # Get dependee IDs + if kernel.options.use_dependencies_v2: + dependee_ids = simplified_depends_on_graph.get(insn_id, set()) + else: + dependee_ids = kernel.id_to_insn[insn_id].depends_on + + for dep_id in dependee_ids: if dep_id in except_insn_ids: continue yield dep_id - yield from gen_dependencies_except(kernel, dep_id, except_insn_ids) + yield from gen_dependencies_except( + kernel, dep_id, except_insn_ids, simplified_depends_on_graph) def get_priority_tiers(wanted, priorities): @@ -630,6 +684,7 @@ class SchedulerState(ImmutableRecord): A list of loopy :class:`Instruction` objects in topologically sorted order with instruction priorities as tie breaker. """ + # TODO document simplified_depends_on_graph @property def last_entered_loop(self): @@ -641,12 +696,20 @@ def last_entered_loop(self): # }}} -def get_insns_in_topologically_sorted_order(kernel): +def get_insns_in_topologically_sorted_order( + kernel, simplified_depends_on_graph): from pytools.graph import compute_topological_order rev_dep_map = {insn.id: set() for insn in kernel.instructions} for insn in kernel.instructions: - for dep in insn.depends_on: + + if kernel.options.use_dependencies_v2: + dependee_ids = simplified_depends_on_graph.get( + insn.id, set()) + else: + dependee_ids = insn.depends_on + + for dep in dependee_ids: rev_dep_map[dep].add(insn.id) # For breaking ties, we compare the features of an intruction @@ -680,7 +743,8 @@ def key(insn_id): # {{{ schedule_as_many_run_insns_as_possible -def schedule_as_many_run_insns_as_possible(sched_state, template_insn): +def schedule_as_many_run_insns_as_possible( + sched_state, template_insn, use_dependencies_v2): """ Returns an instance of :class:`loopy.schedule.SchedulerState`, by appending all reachable instructions that are similar to *template_insn*. We define @@ -748,7 +812,13 @@ def is_similar_to_template(insn): if is_similar_to_template(insn): # check reachability - if not (insn.depends_on & ignored_unscheduled_insn_ids): + if use_dependencies_v2: + dependee_ids = sched_state.simplified_depends_on_graph.get( + insn.id, set()) + else: + dependee_ids = insn.depends_on + + if not (dependee_ids & ignored_unscheduled_insn_ids): if insn.id in sched_state.prescheduled_insn_ids: if next_preschedule_insn_id() == insn.id: preschedule.pop(0) @@ -937,7 +1007,14 @@ def insn_sort_key(insn_id): for insn_id in insn_ids_to_try: insn = kernel.id_to_insn[insn_id] - is_ready = insn.depends_on <= sched_state.scheduled_insn_ids + # make sure dependees have been scheduled + if kernel.options.use_dependencies_v2: + dependee_ids = sched_state.simplified_depends_on_graph.get( + insn.id, set()) + else: + dependee_ids = insn.depends_on + + is_ready = dependee_ids <= sched_state.scheduled_insn_ids if not is_ready: continue @@ -1068,8 +1145,8 @@ def insn_sort_key(insn_id): insns_in_topologically_sorted_order=new_toposorted_insns, ) - new_sched_state = schedule_as_many_run_insns_as_possible(new_sched_state, - insn) + new_sched_state = schedule_as_many_run_insns_as_possible( + new_sched_state, insn, kernel.options.use_dependencies_v2) # Don't be eager about entering/leaving loops--if progress has been # made, revert to top of scheduler and see if more progress can be @@ -1116,8 +1193,10 @@ def insn_sort_key(insn_id): # check if there's a dependency of insn that needs to be # outside of last_entered_loop. - for subdep_id in gen_dependencies_except(kernel, insn_id, - sched_state.scheduled_insn_ids): + for subdep_id in gen_dependencies_except( + kernel, insn_id, + sched_state.scheduled_insn_ids, + sched_state.simplified_depends_on_graph): want = (kernel.insn_inames(subdep_id) - sched_state.parallel_inames) if ( @@ -1754,10 +1833,10 @@ def _insn_ids_reaching_end(schedule, kind, reverse): return insn_ids_alive_at_scope[-1] -def append_barrier_or_raise_error(kernel_name, schedule, dep, verify_only): +def append_barrier_or_raise_error( + kernel_name, schedule, dep, verify_only, use_dependencies_v2=False): if verify_only: - from loopy.diagnostic import MissingBarrierError - raise MissingBarrierError( + err_str = ( "%s: Dependency '%s' (for variable '%s') " "requires synchronization " "by a %s barrier (add a 'no_sync_with' " @@ -1769,6 +1848,14 @@ def append_barrier_or_raise_error(kernel_name, schedule, dep, verify_only): tgt=dep.target.id, src=dep.source.id), dep.variable, dep.var_kind)) + # TODO need to update all this with v2 deps. For now, make this a warning. + # Do full fix for this later + if use_dependencies_v2: + from warnings import warn + warn(err_str) + else: + from loopy.diagnostic import MissingBarrierError + raise MissingBarrierError(err_str) else: comment = "for {} ({})".format( dep.variable, dep.dep_descr.format( @@ -1836,7 +1923,8 @@ def insert_barriers_at_outer_level(schedule, reverse=False): dep_tracker.gen_dependencies_with_target_at(insn) for insn in loop_head): append_barrier_or_raise_error( - kernel.name, result, dep, verify_only) + kernel.name, result, dep, verify_only, + kernel.options.use_dependencies_v2) # This barrier gets inserted outside the loop, hence it is # executed unconditionally and so kills all sources before # the loop. @@ -1869,7 +1957,8 @@ def insert_barriers_at_outer_level(schedule, reverse=False): for dep in dep_tracker.gen_dependencies_with_target_at( sched_item.insn_id): append_barrier_or_raise_error( - kernel.name, result, dep, verify_only) + kernel.name, result, dep, verify_only, + kernel.options.use_dependencies_v2) dep_tracker.discard_all_sources() break result.append(sched_item) @@ -1998,13 +2087,32 @@ def generate_loop_schedules_inner(kernel, callables_table, debug_args=None): loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) + + # {{{ Create simplified dependency graph with edge from *depender* to + # *dependee* iff intersection (SAME_map & DEP_map) is not empty + + if kernel.options.use_dependencies_v2: + from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, + ) + + # Get dep graph edges with edges FROM depender TO dependee + simplified_depends_on_graph = filter_deps_by_intersection_with_SAME(kernel) + else: + simplified_depends_on_graph = None + + # }}} + sched_state = SchedulerState( kernel=kernel, loop_nest_around_map=loop_nest_around_map, loop_insn_dep_map=find_loop_insn_dep_map( kernel, loop_nest_with_map=loop_nest_with_map, - loop_nest_around_map=loop_nest_around_map), + loop_nest_around_map=loop_nest_around_map, + simplified_depends_on_graph=simplified_depends_on_graph, + ), + simplified_depends_on_graph=simplified_depends_on_graph, breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, @@ -2034,7 +2142,8 @@ def generate_loop_schedules_inner(kernel, callables_table, debug_args=None): active_group_counts={}, insns_in_topologically_sorted_order=( - get_insns_in_topologically_sorted_order(kernel)), + get_insns_in_topologically_sorted_order( + kernel, simplified_depends_on_graph)), ) schedule_gen_kwargs = {} diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py new file mode 100644 index 000000000..4f6e6d70e --- /dev/null +++ b/loopy/schedule/checker/dependency.py @@ -0,0 +1,136 @@ +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + + +class DependencyType: + """Strings specifying a particular type of dependency relationship. + + .. attribute:: SAME + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff SAME({i, j})`` specifies that + ``insn0 happens before insn1 iff {i' = i and j' = j and ...}``. + Note that ``SAME({}) = True``. + + .. attribute:: PRIOR + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, k, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', k', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, k, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff PRIOR({i, j, k})`` specifies one of + two possibilities, depending on whether the loop nest ordering is + known. If the loop nest ordering is unknown, then + ``insn0 happens before insn1 iff {i' < i and j' < j and k' < k ...}``. + If the loop nest ordering is known, the condition becomes + ``{i', j', k', ...}`` is lexicographically less than ``{i, j, k, ...}``, + i.e., ``i' < i or (i' = i and j' < j) or (i' = i and j' = j and k' < k) ...``. + + """ + + SAME = "same" + PRIOR = "prior" + + +def filter_deps_by_intersection_with_SAME(knl): + # Determine which dep relations have a non-empty intersection with + # the SAME relation + # TODO document + + from loopy.schedule.checker.utils import ( + append_mark_to_strings, + partition_inames_by_concurrency, + create_elementwise_comparison_conjunction_set, + convert_map_to_set, + convert_set_back_to_map, + ) + from loopy.schedule.checker.schedule import ( + BEFORE_MARK, + ) + _, non_conc_inames = partition_inames_by_concurrency(knl) + + # NOTE: deps filtered will map depender->dependee + deps_filtered = {} + for stmt in knl.instructions: + + if hasattr(stmt, "dependencies") and stmt.dependencies: + + depender_id = stmt.id + + for dependee_id, dep_maps in stmt.dependencies.items(): + + # Continue if we've been told to ignore this dependee + # (non_linearizing_deps is only an attribute of stmt in one + # (unmerged) branch, and may be eliminated) + if ( + hasattr(stmt, "non_linearizing_deps") and + stmt.non_linearizing_deps is not None and + dependee_id in stmt.non_linearizing_deps): + continue + + # Continue if we already have this pair + if depender_id in deps_filtered.keys() and ( + dependee_id in deps_filtered[depender_id]): + continue + + for dep_map in dep_maps: + # Create isl map representing "SAME" dep for these two insns + + # Get shared nonconcurrent inames + depender_inames = knl.id_to_insn[depender_id].within_inames + dependee_inames = knl.id_to_insn[dependee_id].within_inames + shared_nc_inames = ( + depender_inames & dependee_inames & non_conc_inames) + + # Temporarily convert to set + dep_set_space, n_in_dims, n_out_dims = convert_map_to_set( + dep_map.space) + + # Create SAME relation + same_set_affs = isl.affs_from_space(dep_set_space) + same_set = create_elementwise_comparison_conjunction_set( + shared_nc_inames, + append_mark_to_strings(shared_nc_inames, BEFORE_MARK), + same_set_affs) + + # Convert back to map + same_map = convert_set_back_to_map( + same_set, n_in_dims, n_out_dims) + + # Don't need to intersect same_map with iname bounds (I think..?) + + # See whether the intersection of dep map and SAME is empty + intersect_dep_and_same = same_map & dep_map + intersect_not_empty = not bool(intersect_dep_and_same.is_empty()) + + if intersect_not_empty: + deps_filtered.setdefault(depender_id, set()).add(dependee_id) + break # No need to check any more deps for this pair + + return deps_filtered diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py index 12d3480e1..1c60834de 100644 --- a/loopy/schedule/checker/utils.py +++ b/loopy/schedule/checker/utils.py @@ -197,7 +197,6 @@ def append_mark_to_isl_map_var_names(old_isl_map, dt, mark): def append_mark_to_strings(strings, mark): - assert isinstance(strings, list) return [s+mark for s in strings] @@ -340,6 +339,21 @@ def sorted_union_of_names_in_isl_sets( return sorted(inames) +def convert_map_to_set(isl_map): + # (also works for spaces) + n_in_dims = len(isl_map.get_var_names(dim_type.in_)) + n_out_dims = len(isl_map.get_var_names(dim_type.out)) + return isl_map.move_dims( + dim_type.in_, n_in_dims, dim_type.out, 0, n_out_dims + ).domain(), n_in_dims, n_out_dims + + +def convert_set_back_to_map(isl_set, n_old_in_dims, n_old_out_dims): + return isl.Map.from_domain( + isl_set).move_dims( + dim_type.out, 0, dim_type.in_, n_old_in_dims, n_old_out_dims) + + def create_symbolic_map_from_tuples( tuple_pairs_with_domains, space, diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 4258a2c52..e95452ae9 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -151,6 +151,13 @@ def _process_and_linearize(knl, knl_name="loopy_kernel"): proc_knl[knl_name], proc_knl.callables_table) return lin_knl.linearization, proc_knl[knl_name], lin_knl + +def _get_runinstruction_ids_from_linearization(lin_items): + from loopy.schedule import RunInstruction + return [ + lin_item.insn_id for lin_item in lin_items + if isinstance(lin_item, RunInstruction)] + # }}} @@ -1586,10 +1593,7 @@ def test_sios_with_matmul(): lin_items, proc_knl, lin_knl = _process_and_linearize(knl) # Get ALL statement id pairs - from loopy.schedule import RunInstruction - all_stmt_ids = [ - lin_item.insn_id for lin_item in lin_items - if isinstance(lin_item, RunInstruction)] + all_stmt_ids = _get_runinstruction_ids_from_linearization(lin_items) from itertools import product stmt_id_pairs = [] for idx, sid in enumerate(all_stmt_ids): @@ -1807,8 +1811,7 @@ def test_add_dependency_with_new_deps(): # Add a dependency to stmt_b dep_b_on_a = make_dep_map( - "[pi] -> {{ [i'] -> [i] : i > i' " - "and {0} " + "[pi] -> {{ [i'] -> [i] : i > i' and {0} " "}}".format(assumptions_str), knl_with_domains=knl["loopy_kernel"]) knl = lp.add_dependency(knl, "id:stmt_b", ("id:stmt_a", dep_b_on_a)) @@ -1826,7 +1829,8 @@ def test_add_dependency_with_new_deps(): dep_b_on_a_test = _isl_map_with_marked_dims( "[pi] -> {{ [{3}'=0, i'] -> [{3}=1, i] : i > i' " - "and {0} and {1} and {2} }}".format( + "and {0} and {1} and {2} " + "}}".format( i_range_str, i_range_str_p, assumptions_str, @@ -1853,11 +1857,18 @@ def test_add_dependency_with_new_deps(): else: assert not stmt.dependencies + # Add a second dependency to stmt_b + dep_b_on_a_2 = make_dep_map( + "[pi] -> {{ [i'] -> [i] : i = i' and {0}" + "}}".format(assumptions_str), + knl_with_domains=knl["loopy_kernel"]) + # {{{ Test make_dep_map while we're here dep_b_on_a_2_test = _isl_map_with_marked_dims( "[pi] -> {{ [{3}'=0, i'] -> [{3}=1, i] : i = i' " - "and {0} and {1} and {2} }}".format( + "and {0} and {1} and {2} " + "}}".format( i_range_str, i_range_str_p, assumptions_str, @@ -1870,14 +1881,12 @@ def test_add_dependency_with_new_deps(): # Add dependencies to stmt_c dep_c_on_a = make_dep_map( - "[pi] -> {{ [i'] -> [i] : i >= i' " - "and {0} " + "[pi] -> {{ [i'] -> [i] : i >= i' and {0} " "}}".format(assumptions_str), knl_with_domains=knl["loopy_kernel"]) dep_c_on_b = make_dep_map( - "[pi] -> {{ [i'] -> [i] : i >= i' " - "and {0} " + "[pi] -> {{ [i'] -> [i] : i >= i' and {0} " "}}".format(assumptions_str), knl_with_domains=knl["loopy_kernel"]) @@ -1951,8 +1960,8 @@ def test_make_dep_map(): # Create expected dep dep_b_on_a_test = _isl_map_with_marked_dims( - "[n] -> {{ [{0}'=0, i', j'] -> [{0}=1, i, k] : i > i' and j' < k" - " and {1} }}".format( + "[n] -> {{ [{0}'=0, i', j'] -> [{0}=1, i, k] : i > i' and j' < k and {1} " + "}}".format( STATEMENT_VAR_NAME, " and ".join([ i_range_str, @@ -2090,6 +2099,206 @@ def test_new_dependencies_finite_diff(): # }}} + +# {{{ Dependency handling during linearization + +# {{{ test_filtering_deps_by_same + +def test_filtering_deps_by_same(): + + # Make a kernel (just need something that can carry deps) + knl = lp.make_kernel( + "{[i,j,k,m] : 0 <= i,j,k,m < n}", + """ + a[i,j,k,m] = 5 {id=s5} + a[i,j,k,m] = 4 {id=s4} + a[i,j,k,m] = 3 {id=s3} + a[i,j,k,m] = 2 {id=s2} + a[i,j,k,m] = 1 {id=s1} + """) + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) + knl = lp.tag_inames(knl, "m:l.0") + + # Make some deps + + def _dep_with_condition(stmt_before, stmt_after, cond): + sid_after = 0 if stmt_before == stmt_after else 1 + return _isl_map_with_marked_dims( + "[n] -> {{" + "[{0}'=0, i', j', k', m'] -> [{0}={1}, i, j, k, m] : " + "0 <= i,j,k,m,i',j',k',m' < n and {2}" + "}}".format( + STATEMENT_VAR_NAME, sid_after, cond)) + + dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k'=k and m't5 = 5 {id=s5} + <>t3 = 3 {id=s3} + <>t4 = 4 {id=s4} + <>t1 = 1 {id=s1} + <>t2 = 2 {id=s2} + end + """) + knl = lp.tag_inames(knl, "m:l.0") + + stmt_ids_ordered_desired = ["s1", "s2", "s3", "s4", "s5"] + + # {{{ Add some deps + + def _dep_with_condition(stmt_before, stmt_after, cond): + sid_after = 0 if stmt_before == stmt_after else 1 + return _isl_map_with_marked_dims( + "[n] -> {{" + "[{0}'=0, i', j', k', m'] -> [{0}={1}, i, j, k, m] : " + "0 <= i,j,k,m,i',j',k',m' < n and {2}" + "}}".format( + STATEMENT_VAR_NAME, sid_after, cond)) + + # Should NOT create an edge: + dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k' =k and m'=m") + # Should create an edge: + dep_s2_on_s1_2 = _dep_with_condition(2, 1, "i'<=i and j'<=j and k' =k and m'=m") + # Should NOT create an edge: + dep_s2_on_s2_1 = _dep_with_condition(2, 2, "i'< i and j'<=j and k' =k and m'=m") + # Should NOT create an edge: + dep_s2_on_s2_2 = _dep_with_condition(2, 2, "i'<=i and j'<=j and k'< k and m'=m") + # Should create an edge: + dep_s3_on_s2_1 = _dep_with_condition(3, 2, "i'<=i and j'<=j and k' =k and m'=m") + # Should create an edge: + dep_s4_on_s3_1 = _dep_with_condition(4, 3, "i'<=i and j'<=j and k' =k and m'=m") + # Should create an edge: + dep_s5_on_s4_1 = _dep_with_condition(5, 4, "i' =i and j' =j and k' =k and m'=m") + + knl = lp.add_dependency(knl, "id:s2", ("id:s1", dep_s2_on_s1_1)) + knl = lp.add_dependency(knl, "id:s2", ("id:s1", dep_s2_on_s1_2)) + knl = lp.add_dependency(knl, "id:s2", ("id:s2", dep_s2_on_s2_1)) + knl = lp.add_dependency(knl, "id:s2", ("id:s2", dep_s2_on_s2_2)) + knl = lp.add_dependency(knl, "id:s3", ("id:s2", dep_s3_on_s2_1)) + knl = lp.add_dependency(knl, "id:s4", ("id:s3", dep_s4_on_s3_1)) + knl = lp.add_dependency(knl, "id:s5", ("id:s4", dep_s5_on_s4_1)) + + # }}} + + # {{{ Test filteringn of deps by intersection with SAME + + from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, + ) + filtered_depends_on_dict = filter_deps_by_intersection_with_SAME( + knl["loopy_kernel"]) + + # Make sure filtered edges are correct + + # (m is concurrent so shouldn't matter) + depends_on_dict_expected = { + "s2": set(["s1"]), + "s3": set(["s2"]), + "s4": set(["s3"]), + "s5": set(["s4"]), + } + + assert filtered_depends_on_dict == depends_on_dict_expected + + # }}} + + # {{{ Get a linearization WITHOUT using the simplified dep graph + + knl = lp.set_options(knl, use_dependencies_v2=False) + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) + + # Check stmt order (should be wrong) + stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items) + assert stmt_ids_ordered != stmt_ids_ordered_desired + + # Check dep satisfaction (should not all be satisfied) + unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items) + assert unsatisfied_deps + + # }}} + + # {{{ Get a linearization using the simplified dep graph + + knl = lp.set_options(knl, use_dependencies_v2=True) + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) + + # Check stmt order + stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items) + assert stmt_ids_ordered == stmt_ids_ordered_desired + + # Check dep satisfaction + unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items) + assert not unsatisfied_deps + + # }}} + +# }}} + +# }}} + # }}}