Skip to content


This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into drop-deprecations
Browse files Browse the repository at this point in the history
steven-johnson committed Jan 2, 2025


This commit was signed with the committer’s verified signature.
2 parents a65cbb1 + 9ecda2d commit b272643
Showing 31 changed files with 730 additions and 681 deletions.
31 changes: 11 additions & 20 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -1,48 +1,39 @@
AccessModifierOffset: -4
AlignEscapedNewlines: Left
AlignTrailingComments: true
Kind: Always
OverEmptyLines: 0
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Empty
AllowShortFunctionsOnASingleLine: None
AllowShortIfStatementsOnASingleLine: Always
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: Yes
BinPackParameters: true
BreakAfterReturnType: Automatic
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: false
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakTemplateDeclarations: Yes
ColumnLimit: 0
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
ExperimentalAutoDetectBinPacking: false
IndentCaseLabels: false
IndentWrappedFunctionNames: false
IndentWidth: 4
IndentWrappedFunctionNames: false
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCSpaceBeforeProtocolList: true
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 60
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PackConstructorInitializers: BinPack
PointerAlignment: Right
SpaceBeforeParens: ControlStatements
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceInEmptyParentheses: false
SpaceBeforeParens: ControlStatements
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInAngles: Never
SpacesInParens: Never
Standard: c++17
TabWidth: 8
UseTab: Never
13 changes: 7 additions & 6 deletions .github/workflows/presubmit.yml
Original file line number Diff line number Diff line change
@@ -16,14 +16,15 @@ permissions:
name: Check clang-format
runs-on: ubuntu-20.04
runs-on: macos-14
- uses: actions/checkout@v3
- uses: DoozyX/[email protected]
source: '.'
extensions: 'h,c,cpp'
clangFormatVersion: 17
- name: Install clang-format
run: brew install llvm@19
- name: Check clang-format
run: ./ -c
CLANG_FORMAT_LLVM_INSTALL_DIR: /opt/homebrew/opt/llvm@19
name: Check clang-tidy
runs-on: macos-14
2 changes: 1 addition & 1 deletion apps/HelloAndroidCamera2/jni/LockedSurface.cpp
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@

// Round x up to a multiple of mask.
// E.g., ALIGN(x, 16) means round x up to the nearest multiple of 16.
#define ALIGN(x, mask) (((x) + (mask)-1) & ~((mask)-1))
#define ALIGN(x, mask) (((x) + (mask) - 1) & ~((mask) - 1))

LockedSurface *LockedSurface::lock(JNIEnv *env, jobject surface) {
LockedSurface *output = new LockedSurface;
5 changes: 1 addition & 4 deletions apps/HelloWasm/core.cpp
Original file line number Diff line number Diff line change
@@ -76,10 +76,7 @@ void mainloop(void *arg) {
"Frame rate: %2.0f fps",
ctx->smoothed_runtime, ctx->smoothed_blit_time, ctx->smoothed_fps);
// Run some javascript inline to update the web-page
document.getElementById(UTF8ToString($0)).innerHTML = UTF8ToString($1);
"runtime", buf);
EM_ASM({ document.getElementById(UTF8ToString($0)).innerHTML = UTF8ToString($1); }, "runtime", buf);

// Read the threads slider from the UI
int threads = EM_ASM_INT({
96 changes: 48 additions & 48 deletions apps/blur/test.cpp
Original file line number Diff line number Diff line change
@@ -70,61 +70,61 @@ Buffer<uint16_t, 2> blur_fast(Buffer<uint16_t, 2> in) {
#elif __ARM_NEON
uint16x4_t one_third = vdup_n_u16(21846);
uint16x4_t one_third = vdup_n_u16(21846);
#pragma omp parallel for
for (int yTile = 0; yTile < out.height(); yTile += 32) {
uint16x8_t tmp[(128 / 8) * (32 + 2)];
for (int xTile = 0; xTile < out.width(); xTile += 128) {
uint16_t *tmpPtr = (uint16_t *)tmp;
for (int y = 0; y < 32 + 2; y++) {
const uint16_t *inPtr = &(in(xTile, yTile + y));
for (int x = 0; x < 128; x += 8) {
uint16x8_t a = vld1q_u16(inPtr);
uint16x8_t b = vld1q_u16(inPtr + 1);
uint16x8_t c = vld1q_u16(inPtr + 2);
uint16x8_t sum = vaddq_u16(vaddq_u16(a, b), c);
uint16x4_t sumlo = vget_low_u16(sum);
uint16x4_t sumhi = vget_high_u16(sum);
uint16x4_t avglo = vshrn_n_u32(vmull_u16(sumlo, one_third), 16);
uint16x4_t avghi = vshrn_n_u32(vmull_u16(sumhi, one_third), 16);
uint16x8_t avg = vcombine_u16(avglo, avghi);
vst1q_u16(tmpPtr, avg);
tmpPtr += 8;
inPtr += 8;
for (int yTile = 0; yTile < out.height(); yTile += 32) {
uint16x8_t tmp[(128 / 8) * (32 + 2)];
for (int xTile = 0; xTile < out.width(); xTile += 128) {
uint16_t *tmpPtr = (uint16_t *)tmp;
for (int y = 0; y < 32 + 2; y++) {
const uint16_t *inPtr = &(in(xTile, yTile + y));
for (int x = 0; x < 128; x += 8) {
uint16x8_t a = vld1q_u16(inPtr);
uint16x8_t b = vld1q_u16(inPtr + 1);
uint16x8_t c = vld1q_u16(inPtr + 2);
uint16x8_t sum = vaddq_u16(vaddq_u16(a, b), c);
uint16x4_t sumlo = vget_low_u16(sum);
uint16x4_t sumhi = vget_high_u16(sum);
uint16x4_t avglo = vshrn_n_u32(vmull_u16(sumlo, one_third), 16);
uint16x4_t avghi = vshrn_n_u32(vmull_u16(sumhi, one_third), 16);
uint16x8_t avg = vcombine_u16(avglo, avghi);
vst1q_u16(tmpPtr, avg);
tmpPtr += 8;
inPtr += 8;
tmpPtr = (uint16_t *)tmp;
for (int y = 0; y < 32; y++) {
uint16_t *outPtr = &(out(xTile, yTile + y));
for (int x = 0; x < 128; x += 8) {
uint16x8_t a = vld1q_u16(tmpPtr + (2 * 128));
uint16x8_t b = vld1q_u16(tmpPtr + 128);
uint16x8_t c = vld1q_u16(tmpPtr);
uint16x8_t sum = vaddq_u16(vaddq_u16(a, b), c);
uint16x4_t sumlo = vget_low_u16(sum);
uint16x4_t sumhi = vget_high_u16(sum);
uint16x4_t avglo = vshrn_n_u32(vmull_u16(sumlo, one_third), 16);
uint16x4_t avghi = vshrn_n_u32(vmull_u16(sumhi, one_third), 16);
uint16x8_t avg = vcombine_u16(avglo, avghi);
vst1q_u16(outPtr, avg);
tmpPtr += 8;
outPtr += 8;
tmpPtr = (uint16_t *)tmp;
for (int y = 0; y < 32; y++) {
uint16_t *outPtr = &(out(xTile, yTile + y));
for (int x = 0; x < 128; x += 8) {
uint16x8_t a = vld1q_u16(tmpPtr + (2 * 128));
uint16x8_t b = vld1q_u16(tmpPtr + 128);
uint16x8_t c = vld1q_u16(tmpPtr);
uint16x8_t sum = vaddq_u16(vaddq_u16(a, b), c);
uint16x4_t sumlo = vget_low_u16(sum);
uint16x4_t sumhi = vget_high_u16(sum);
uint16x4_t avglo = vshrn_n_u32(vmull_u16(sumlo, one_third), 16);
uint16x4_t avghi = vshrn_n_u32(vmull_u16(sumhi, one_third), 16);
uint16x8_t avg = vcombine_u16(avglo, avghi);
vst1q_u16(outPtr, avg);
tmpPtr += 8;
outPtr += 8;
// No intrinsics enabled, do a naive thing.
for (int y = 0; y < out.height(); y++) {
for (int x = 0; x < out.width(); x++) {
int tmp[3] = {
(in(x, y) + in(x + 1, y) + in(x + 2, y)) / 3,
(in(x, y + 1) + in(x + 1, y + 1) + in(x + 2, y + 1)) / 3,
(in(x, y + 2) + in(x + 1, y + 2) + in(x + 2, y + 2)) / 3,
out(x, y) = (tmp[0] + tmp[1] + tmp[2]) / 3;
// No intrinsics enabled, do a naive thing.
for (int y = 0; y < out.height(); y++) {
for (int x = 0; x < out.width(); x++) {
int tmp[3] = {
(in(x, y) + in(x + 1, y) + in(x + 2, y)) / 3,
(in(x, y + 1) + in(x + 1, y + 1) + in(x + 2, y + 1)) / 3,
(in(x, y + 2) + in(x + 1, y + 2) + in(x + 2, y + 2)) / 3,
out(x, y) = (tmp[0] + tmp[1] + tmp[2]) / 3;

4 changes: 2 additions & 2 deletions apps/hexagon_dma/process_raw_linear_interleaved_basic.cpp
Original file line number Diff line number Diff line change
@@ -42,9 +42,9 @@ typedef struct {
#define _SCHEDULE_STR(s) #s
#define _SCHEDULE_NAME(data, direction, schedule) pipeline_##data##_##direction##_##schedule
#define _SCHEDULE_PAIR(data, direction, schedule) \
{ _SCHEDULE_STR(scheduled - pipeline(data, direction, schedule)), _SCHEDULE_NAME(data, direction, schedule) }
{_SCHEDULE_STR(scheduled - pipeline(data, direction, schedule)), _SCHEDULE_NAME(data, direction, schedule)}
#define SCHEDULE_FUNCTION_RW(schedule) _SCHEDULE_PAIR(raw_linear_interleaved, rw, schedule)

4 changes: 2 additions & 2 deletions apps/hexagon_dma/process_yuv_linear_basic.cpp
Original file line number Diff line number Diff line change
@@ -55,9 +55,9 @@ typedef struct {
#define _SCHEDULE_STR(s) #s
#define _SCHEDULE_NAME(data, direction, schedule) pipeline_##data##_##direction##_##schedule
#define _SCHEDULE_PAIR(data, direction, schedule) \
{ _SCHEDULE_STR(scheduled - pipeline(data, direction, schedule)), _SCHEDULE_NAME(data, direction, schedule) }
{_SCHEDULE_STR(scheduled - pipeline(data, direction, schedule)), _SCHEDULE_NAME(data, direction, schedule)}
#define SCHEDULE_FUNCTION_RW(type, schedule) _SCHEDULE_PAIR(type##_linear, rw, schedule)

40 changes: 20 additions & 20 deletions apps/resnet_50/Resnet50Generator.cpp
Original file line number Diff line number Diff line change
@@ -31,35 +31,35 @@ class Resnet50Generator : public Halide::Generator<Resnet50Generator> {
Input<Buffer<float, 3>> input{"input"};
/** parameter values for scaling layers **/
Input<Buffer<float, 1>> conv1_gamma{"conv1_gamma"};
Input<Buffer<float, 1>[4]> br1_gamma { "br1_gamma" };
Input<Buffer<float, 1>[16]> br2a_gamma { "br2a_gamma" };
Input<Buffer<float, 1>[16]> br2b_gamma { "br2b_gamma" };
Input<Buffer<float, 1>[16]> br2c_gamma { "br2c_gamma" };
Input<Buffer<float, 1>[4]> br1_gamma{"br1_gamma"};
Input<Buffer<float, 1>[16]> br2a_gamma{"br2a_gamma"};
Input<Buffer<float, 1>[16]> br2b_gamma{"br2b_gamma"};
Input<Buffer<float, 1>[16]> br2c_gamma{"br2c_gamma"};

Input<Buffer<float, 1>> conv1_beta{"conv1_beta"};
Input<Buffer<float, 1>[4]> br1_beta { "br1_beta" };
Input<Buffer<float, 1>[16]> br2a_beta { "br2a_beta" };
Input<Buffer<float, 1>[16]> br2b_beta { "br2b_beta" };
Input<Buffer<float, 1>[16]> br2c_beta { "br2c_beta" };
Input<Buffer<float, 1>[4]> br1_beta{"br1_beta"};
Input<Buffer<float, 1>[16]> br2a_beta{"br2a_beta"};
Input<Buffer<float, 1>[16]> br2b_beta{"br2b_beta"};
Input<Buffer<float, 1>[16]> br2c_beta{"br2c_beta"};

Input<Buffer<float, 1>> conv1_mu{"conv1_mu"};
Input<Buffer<float, 1>[4]> br1_mu { "br1_mu" };
Input<Buffer<float, 1>[16]> br2a_mu { "br2a_mu" };
Input<Buffer<float, 1>[16]> br2b_mu { "br2b_mu" };
Input<Buffer<float, 1>[16]> br2c_mu { "br2c_mu" };
Input<Buffer<float, 1>[4]> br1_mu{"br1_mu"};
Input<Buffer<float, 1>[16]> br2a_mu{"br2a_mu"};
Input<Buffer<float, 1>[16]> br2b_mu{"br2b_mu"};
Input<Buffer<float, 1>[16]> br2c_mu{"br2c_mu"};

Input<Buffer<float, 1>> conv1_sig{"conv1_sig"};
Input<Buffer<float, 1>[4]> br1_sig { "br1_sig" };
Input<Buffer<float, 1>[16]> br2a_sig { "br2a_sig" };
Input<Buffer<float, 1>[16]> br2b_sig { "br2b_sig" };
Input<Buffer<float, 1>[16]> br2c_sig { "br2c_sig" };
Input<Buffer<float, 1>[4]> br1_sig{"br1_sig"};
Input<Buffer<float, 1>[16]> br2a_sig{"br2a_sig"};
Input<Buffer<float, 1>[16]> br2b_sig{"br2b_sig"};
Input<Buffer<float, 1>[16]> br2c_sig{"br2c_sig"};

/** weights and biases for convolutions **/
Input<Buffer<float, 4>> conv1_weights{"conv1_weights"};
Input<Buffer<float, 4>[4]> br1_conv_weights { "br1_conv_weights" };
Input<Buffer<float, 4>[16]> br2a_conv_weights { "br2a_conv_weights" };
Input<Buffer<float, 4>[16]> br2b_conv_weights { "br2b_conv_weights" };
Input<Buffer<float, 4>[16]> br2c_conv_weights { "br2c_conv_weights" };
Input<Buffer<float, 4>[4]> br1_conv_weights{"br1_conv_weights"};
Input<Buffer<float, 4>[16]> br2a_conv_weights{"br2a_conv_weights"};
Input<Buffer<float, 4>[16]> br2b_conv_weights{"br2b_conv_weights"};
Input<Buffer<float, 4>[16]> br2c_conv_weights{"br2c_conv_weights"};

Input<Buffer<float, 2>> fc1000_weights{"fc1000_weights"};
Input<Buffer<float, 1>> fc1000_bias{"fc1000_bias"};

0 comments on commit b272643

Please sign in to comment.