View Issue Details
ID | Project | Category | View Status | Date Submitted | Last Update |
---|---|---|---|---|---|
0002005 | ardour | features | public | 2008-01-02 11:57 | 2008-01-02 11:57 |
Reporter | timblech | Assigned To | |||
Priority | normal | Severity | feature | Reproducibility | always |
Status | new | Resolution | open | ||
Summary | 0002005: helping gcc's autovectorizer | ||||
Description | the attached patch helps gcc's autovectorizer to vectorize some functions. - use __restrict__ pointer to avoid aliasing problem - use stl min/max functions instead of (double-precision) fmin/fmax using gcc-4.2 and the CXXFLAGS -mfpmath=sse and -ftree-vectorize, some of the code can be translated to vectorized sse code ... | ||||
Tags | No tags attached. | ||||
2008-01-02 11:57
|
0001-helping-the-gcc-autovectorizer.patch (4,397 bytes)
From 320d9f649a6f5630c6c2387d02e6902fc13ca4a1 Mon Sep 17 00:00:00 2001 From: Tim Blechmann <tim@klingt.org> Date: Wed, 2 Jan 2008 12:31:31 +0100 Subject: [PATCH] helping the gcc autovectorizer only enable __restrict__ attribute with gcc Signed-off-by: Tim Blechmann <tim@klingt.org> --- libs/ardour/ardour/utils.h | 4 ++++ libs/ardour/audio_track.cc | 2 +- libs/ardour/io.cc | 8 +++++--- libs/ardour/mix.cc | 10 +++++----- libs/ardour/utils.cc | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/libs/ardour/ardour/utils.h b/libs/ardour/ardour/utils.h index 0d4ce08..b950fea 100644 --- a/libs/ardour/ardour/utils.h +++ b/libs/ardour/ardour/utils.h @@ -73,5 +73,9 @@ float meter_hold_to_float (ARDOUR::MeterHold); std::string CFStringRefToStdString(CFStringRef stringRef); #endif // HAVE_COREAUDIO +#ifndef __GNUC__ +#define __restrict__ /* __restrict__ */ +#endif + #endif /* __ardour_utils_h__ */ diff --git a/libs/ardour/audio_track.cc b/libs/ardour/audio_track.cc index c2f76f3..65ddf07 100644 --- a/libs/ardour/audio_track.cc +++ b/libs/ardour/audio_track.cc @@ -722,7 +722,7 @@ AudioTrack::export_stuff (vector<Sample*>& buffers, uint32_t nbufs, nframes_t st _gain_automation_curve.get_vector (start, start + nframes, gain_automation, nframes); for (bi = buffers.begin(); bi != buffers.end(); ++bi) { - Sample *b = *bi; + Sample * __restrict__ b = *bi; for (nframes_t n = 0; n < nframes; ++n) { b[n] *= gain_automation[n]; } diff --git a/libs/ardour/io.cc b/libs/ardour/io.cc index 4f20717..1c416cf 100644 --- a/libs/ardour/io.cc +++ b/libs/ardour/io.cc @@ -310,8 +310,8 @@ IO::pan_automated (vector<Sample*>& bufs, uint32_t nbufs, nframes_t start, nfram void IO::pan (vector<Sample*>& bufs, uint32_t nbufs, nframes_t nframes, nframes_t offset, gain_t gain_coeff) { - Sample* dst; - Sample* src; + Sample* __restrict__ dst; + Sample* __restrict__ src; /* io_lock, not taken: function must be called from Session::process() calltree */ @@ -513,8 +513,10 @@ IO::deliver_output_no_pan (vector<Sample *>& bufs, uint32_t nbufs, nframes_t nfr } else if (actual_gain == 0.0f) { memset (dst, 0, sizeof (Sample) * nframes); } else { + Sample * __restrict__ dest = dst; + Sample * __restrict__ source = src; for (nframes_t x = 0; x < nframes; ++x) { - dst[x] = src[x] * actual_gain; + dest[x] = source[x] * actual_gain; } } diff --git a/libs/ardour/mix.cc b/libs/ardour/mix.cc index 2d31c8c..7f8039b 100644 --- a/libs/ardour/mix.cc +++ b/libs/ardour/mix.cc @@ -83,7 +83,7 @@ float compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current) { for (nframes_t i = 0; i < nsamples; ++i) { - current = f_max (current, fabsf (buf[i])); + current = std::max (current, std::abs(buf[i])); } return current; @@ -100,8 +100,8 @@ find_peaks (ARDOUR::Sample *buf, nframes_t nframes, float *min, float *max) for (i = 0; i < nframes; i++) { - a = fmax (buf[i], a); - b = fmin (buf[i], b); + a = std::max (buf[i], a); + b = std::min (buf[i], b); } *max = a; @@ -116,7 +116,7 @@ apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain) } void -mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain) +mix_buffers_with_gain (ARDOUR::Sample * __restrict__ dst, ARDOUR::Sample * __restrict__ src, nframes_t nframes, float gain) { for (nframes_t i = 0; i < nframes; i++) { dst[i] += src[i] * gain; @@ -124,7 +124,7 @@ mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nfram } void -mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes) +mix_buffers_no_gain (ARDOUR::Sample * __restrict__ dst, ARDOUR::Sample * __restrict__ src, nframes_t nframes) { for (nframes_t i=0; i < nframes; i++) { dst[i] += src[i]; diff --git a/libs/ardour/utils.cc b/libs/ardour/utils.cc index e34fdd7..85c1276 100644 --- a/libs/ardour/utils.cc +++ b/libs/ardour/utils.cc @@ -316,7 +316,7 @@ CFStringRefToStdString(CFStringRef stringRef) #endif // HAVE_COREAUDIO void -compute_equal_power_fades (nframes_t nframes, float* in, float* out) +compute_equal_power_fades (nframes_t nframes, float* __restrict__ in, float* __restrict__ out) { double step; -- 1.5.3.7 |