Skip to content

Commit

Permalink
Better follow line limts
Browse files Browse the repository at this point in the history
  • Loading branch information
neil-lindquist committed Nov 1, 2023
1 parent 3b6cf71 commit 3bedce1
Show file tree
Hide file tree
Showing 21 changed files with 64 additions and 77 deletions.
6 changes: 3 additions & 3 deletions src/internal/internal_geadd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ void add(internal::TargetType<Target::Devices>,
scalar_t** b_array_host = a_array_host + batch_size;

auto group_params = device_regions_build<false, 2, scalar_t>(
{A, B},
{a_array_host, b_array_host},
device );
{A, B},
{a_array_host, b_array_host},
device );

scalar_t** a_array_dev = B.array_device(device, queue_index);
scalar_t** b_array_dev = a_array_dev + batch_size;
Expand Down
8 changes: 4 additions & 4 deletions src/internal/internal_gecopy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,10 @@ void copy(internal::TargetType<Target::Devices>,
++batch_count;
};
auto group_params = device_regions_build<false, 1, dst_scalar_t>(
{B},
{b_array_host},
device,
setup_A );
{B},
{b_array_host},
device,
setup_A );

// Usually the output matrix (B) provides all the batch arrays.
// Here we are using A, because of the different types.
Expand Down
6 changes: 3 additions & 3 deletions src/internal/internal_gemm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,9 @@ void gemm(internal::TargetType<Target::Devices>,

// C comes first since we do computation for a local C
auto group_params = device_regions_build<false, 3, scalar_t>(
{C, A, B},
{c_array_host, a_array_host, b_array_host},
device );
{C, A, B},
{c_array_host, a_array_host, b_array_host},
device );

if (C.op() != Op::NoTrans) {
swap(opA, opB);
Expand Down
6 changes: 3 additions & 3 deletions src/internal/internal_gemmA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,9 @@ void gemmA(internal::TargetType<Target::Devices>,
auto B_j = B.sub( j, j, 0, 0 );
// A comes first since we do computation for a local A
auto group_params = device_regions_build<false, 3, scalar_t>(
{A_j, B_j, C},
{a_array_host, b_array_host, c_array_host},
device );
{A_j, B_j, C},
{a_array_host, b_array_host, c_array_host},
device );

trace::Block trace_block("blas::batch::gemm");

Expand Down
10 changes: 5 additions & 5 deletions src/internal/internal_genorm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -442,11 +442,11 @@ void norm(
scalar_t** a_array_host = A.array_host( device, queue_index );

auto group_params = device_regions_build<false, 1, scalar_t>(
{A},
{a_array_host},
device,
{},
irange, jrange );
{A},
{a_array_host},
device,
{},
irange, jrange );

scalar_t** a_array_dev = A.array_device(device, queue_index);

Expand Down
4 changes: 1 addition & 3 deletions src/internal/internal_gescale.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,7 @@ void scale(internal::TargetType<Target::Devices>,
scalar_t** a_array_host = A.array_host( device, queue_index );

auto group_params = device_regions_build<false, 1, scalar_t>(
{A},
{a_array_host},
device );
{A}, {a_array_host}, device );

blas::Queue* queue = A.compute_queue( device, queue_index );

Expand Down
5 changes: 1 addition & 4 deletions src/internal/internal_gescale_row_col.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,7 @@ void scale_row_col(
++batch_count;
};
auto group_params = device_regions_build<false, 1, scalar_t>(
{A},
{a_array_host},
device,
store_rc );
{A}, {a_array_host}, device, store_rc );


scalar_t** a_array_dev = A.array_device( device, queue_index );
Expand Down
8 changes: 2 additions & 6 deletions src/internal/internal_geset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,9 @@ void set(internal::TargetType<Target::Devices>,

auto group_params = diag_same
? device_regions_build<true, 1, scalar_t, true>(
{A},
{a_array_host},
device )
{A}, {a_array_host}, device )
: device_regions_build<true, 1, scalar_t, false>(
{A},
{a_array_host},
device );
{A}, {a_array_host}, device );
blas::Queue* queue = A.compute_queue( device, queue_index );

scalar_t** a_array_dev = A.array_device( device, queue_index );
Expand Down
10 changes: 5 additions & 5 deletions src/internal/internal_henorm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,11 +398,11 @@ void norm(
scalar_t** a_array_host = A.array_host( device, queue_index );

auto group_params = device_regions_build<true, 1, scalar_t>(
{A},
{a_array_host},
device,
{},
ijrange, ijrange );
{A},
{a_array_host},
device,
{},
ijrange, ijrange );

scalar_t** a_array_dev = A.array_device(device, queue_index);

Expand Down
6 changes: 3 additions & 3 deletions src/internal/internal_her2k.cc
Original file line number Diff line number Diff line change
Expand Up @@ -596,9 +596,9 @@ void her2k(internal::TargetType<Target::Devices>,

// C comes first since we do computation for a local C
auto group_params = device_regions_build<true, 5, scalar_t>(
{C, A, AT, BT, B},
{c_array_host, a_array_host, at_array_host, b_array_host, bt_array_host},
device );
{C, A, AT, BT, B},
{c_array_host, a_array_host, at_array_host, b_array_host, bt_array_host},
device );


if (C.op() != Op::NoTrans) {
Expand Down
6 changes: 3 additions & 3 deletions src/internal/internal_herk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -502,9 +502,9 @@ void herk(internal::TargetType<Target::Devices>,

// C comes first since we do computation for a local C
auto group_params = device_regions_build<true, 3, scalar_t>(
{C, A, AT},
{c_array_host, a_array_host, b_array_host},
device );
{C, A, AT},
{c_array_host, a_array_host, b_array_host},
device );

if (C.op() != Op::NoTrans) {
swap(opA, opB);
Expand Down
10 changes: 5 additions & 5 deletions src/internal/internal_synorm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -401,11 +401,11 @@ void norm(internal::TargetType<Target::Devices>,
scalar_t** a_array_host = A.array_host( device, queue_index );

auto group_params = device_regions_build<true, 1, scalar_t>(
{A},
{a_array_host},
device,
{},
ijrange, ijrange );
{A},
{a_array_host},
device,
{},
ijrange, ijrange );

scalar_t** a_array_dev = A.array_device(device, queue_index);

Expand Down
6 changes: 3 additions & 3 deletions src/internal/internal_syr2k.cc
Original file line number Diff line number Diff line change
Expand Up @@ -576,9 +576,9 @@ void syr2k(internal::TargetType<Target::Devices>,

// C comes first since we do computation for a local C
auto group_params = device_regions_build<true, 5, scalar_t>(
{C, A, AT, BT, B},
{c_array_host, a_array_host, at_array_host, b_array_host, bt_array_host},
device );
{C, A, AT, BT, B},
{c_array_host, a_array_host, at_array_host, b_array_host, bt_array_host},
device );


if (C.op() != Op::NoTrans) {
Expand Down
6 changes: 3 additions & 3 deletions src/internal/internal_syrk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,9 @@ void syrk(internal::TargetType<Target::Devices>,

// C comes first since we do computation for a local C
auto group_params = device_regions_build<true, 3, scalar_t>(
{C, A, AT},
{c_array_host, a_array_host, b_array_host},
device );
{C, A, AT},
{c_array_host, a_array_host, b_array_host},
device );

if (C.op() != Op::NoTrans) {
swap(opA, opB);
Expand Down
6 changes: 3 additions & 3 deletions src/internal/internal_trmm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,9 @@ void trmm(internal::TargetType<Target::Devices>,

// B comes first since we do computation for a local B
auto group_params = device_regions_build<false, 2, scalar_t>(
{B, A},
{b_array_host, a_array_host},
device );
{B, A},
{b_array_host, a_array_host},
device );

{
trace::Block trace_block("blas::batch::trmm");
Expand Down
10 changes: 5 additions & 5 deletions src/internal/internal_trnorm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -423,11 +423,11 @@ void norm(
scalar_t** a_array_host = A.array_host( device, queue_index );

auto group_params = device_regions_build<true, 1, scalar_t>(
{A},
{a_array_host},
device,
{},
irange, jrange );
{A},
{a_array_host},
device,
{},
irange, jrange );

scalar_t** a_array_dev = A.array_device(device, queue_index);

Expand Down
6 changes: 3 additions & 3 deletions src/internal/internal_trsm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,9 @@ void trsm(internal::TargetType<Target::Devices>,

// B comes first since we do computation for a local B
auto group_params = device_regions_build<false, 2, scalar_t>(
{B, A},
{b_array_host, a_array_host},
device );
{B, A},
{b_array_host, a_array_host},
device );

{
trace::Block trace_block("blas::batch::trsm");
Expand Down
6 changes: 3 additions & 3 deletions src/internal/internal_tzadd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,9 @@ void add(internal::TargetType<Target::Devices>,
scalar_t** b_array_host = a_array_host + batch_size;

auto group_params = device_regions_build<true, 2, scalar_t>(
{A, B},
{a_array_host, b_array_host},
device );
{A, B},
{a_array_host, b_array_host},
device );

scalar_t** a_array_dev = B.array_device( device, queue_index );
scalar_t** b_array_dev = a_array_dev + batch_size;
Expand Down
8 changes: 4 additions & 4 deletions src/internal/internal_tzcopy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,10 @@ void copy(internal::TargetType<Target::Devices>,
++batch_count;
};
auto group_params = device_regions_build<true, 1, dst_scalar_t>(
{B},
{b_array_host},
device,
setup_A );
{B},
{b_array_host},
device,
setup_A );

// Usually the output matrix (B) provides all the batch arrays.
// Here we are using A, because of the differen types.
Expand Down
4 changes: 1 addition & 3 deletions src/internal/internal_tzscale.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,7 @@ void scale(internal::TargetType<Target::Devices>,
scalar_t** a_array_host = A.array_host( device, queue_index );

auto group_params = device_regions_build<true, 1, scalar_t>(
{A},
{a_array_host},
device );
{A}, {a_array_host}, device );

blas::Queue* queue = A.compute_queue( device, queue_index );

Expand Down
4 changes: 1 addition & 3 deletions src/internal/internal_tzset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,7 @@ void set(
scalar_t** a_array_dev = A.array_device( device );

auto group_params = device_regions_build<true, 1, scalar_t>(
{A},
{a_array_host},
device );
{A}, {a_array_host}, device );

blas::Queue* queue = A.compute_queue(device, queue_index);

Expand Down

0 comments on commit 3bedce1

Please sign in to comment.