From: Good Guy Date: Sat, 2 Mar 2019 04:39:41 +0000 (-0700) Subject: rework overlayframe for parallel build, use -Ofast on frame xfers and plugins X-Git-Tag: 2019-08~91 X-Git-Url: https://cinelerra-gg.org/git/?a=commitdiff_plain;h=15411d7912d8aa1b9bea6ba8862517b058861771;p=goodguy%2Fcinelerra.git rework overlayframe for parallel build, use -Ofast on frame xfers and plugins --- diff --git a/cinelerra-5.1/cinelerra/Makefile b/cinelerra-5.1/cinelerra/Makefile index dfdfd230..be86c3ce 100644 --- a/cinelerra-5.1/cinelerra/Makefile +++ b/cinelerra-5.1/cinelerra/Makefile @@ -1,7 +1,39 @@ export TOPDIR ?= $(CURDIR)/.. include $(TOPDIR)/global_config -OBJS := \ +OVERLAYS := \ + $(OBJDIR)/overlay_direct_rgb161616.o \ + $(OBJDIR)/overlay_direct_rgb888.o \ + $(OBJDIR)/overlay_direct_rgba16161616.o \ + $(OBJDIR)/overlay_direct_rgba8888.o \ + $(OBJDIR)/overlay_direct_rgba_float.o \ + $(OBJDIR)/overlay_direct_rgb_float.o \ + $(OBJDIR)/overlay_direct_yuv161616.o \ + $(OBJDIR)/overlay_direct_yuv888.o \ + $(OBJDIR)/overlay_direct_yuva16161616.o \ + $(OBJDIR)/overlay_direct_yuva8888.o \ + $(OBJDIR)/overlay_nearest_rgb161616.o \ + $(OBJDIR)/overlay_nearest_rgb888.o \ + $(OBJDIR)/overlay_nearest_rgba16161616.o \ + $(OBJDIR)/overlay_nearest_rgba8888.o \ + $(OBJDIR)/overlay_nearest_rgba_float.o \ + $(OBJDIR)/overlay_nearest_rgb_float.o \ + $(OBJDIR)/overlay_nearest_yuv161616.o \ + $(OBJDIR)/overlay_nearest_yuv888.o \ + $(OBJDIR)/overlay_nearest_yuva16161616.o \ + $(OBJDIR)/overlay_nearest_yuva8888.o \ + $(OBJDIR)/overlay_sample_rgb161616.o \ + $(OBJDIR)/overlay_sample_rgb888.o \ + $(OBJDIR)/overlay_sample_rgba16161616.o \ + $(OBJDIR)/overlay_sample_rgba8888.o \ + $(OBJDIR)/overlay_sample_rgba_float.o \ + $(OBJDIR)/overlay_sample_rgb_float.o \ + $(OBJDIR)/overlay_sample_yuv161616.o \ + $(OBJDIR)/overlay_sample_yuv888.o \ + $(OBJDIR)/overlay_sample_yuva16161616.o \ + $(OBJDIR)/overlay_sample_yuva8888.o \ + +OBJS := $(OVERLAYS) \ $(OBJDIR)/aattachmentpoint.o \ $(OBJDIR)/aautomation.o \ $(OBJDIR)/aboutprefs.o \ @@ -187,8 +219,8 @@ OBJS := \ $(OBJDIR)/mwindowmove.o \ $(OBJDIR)/mwindow.o \ $(OBJDIR)/new.o \ - $(OBJDIR)/overlaydirect.o \ $(OBJDIR)/overlayframe.o \ + $(OBJDIR)/overlaydirect.o \ $(OBJDIR)/overlaynearest.o \ $(OBJDIR)/overlaysample.o \ $(OBJDIR)/packagedispatcher.o \ @@ -564,18 +596,14 @@ clean: tags: - ctags -R -h default --langmap=c:+.inc . ../guicast/ ../libzmpeg3 ../plugins ../thirdparty/ffmpeg-* ../thirdparty/giflib-* + ctags -R -h default --langmap=c:+.inc . ../guicast/ ../libzmpeg3 ../plugins ../thirdparty/ffmpeg-* +$(OBJDIR)/fileexr.o: BFLAGS:= -Wno-deprecated +$(OBJDIR)/sha1.o: BFLAGS:= -O3 +$(OVERLAYS): BFLAGS:= -Ofast -g0 $(OBJDIR)/%.o: %.C - $(CXX) `cat $(OBJDIR)/c_flags` -DMSGQUAL=$* -c $< -o $@ - - -$(OBJDIR)/fileexr.o: fileexr.C - $(CXX) `cat $(OBJDIR)/c_flags` -Wno-deprecated -DMSGQUAL=$* -c $< -o $@ - -$(OBJDIR)/sha1.o: sha1.C sha1.h - $(CXX) `cat $(OBJDIR)/c_flags` -O3 -c $< -o $@ + $(CXX) `cat $(OBJDIR)/c_flags` $(BFLAGS) -DMSGQUAL=$* -c $< -o $@ #lv2 ifneq ($(WANT_LV2),no) @@ -588,7 +616,6 @@ $(OBJDIR)/pluginlv2ui.o $(OBJDIR)/lv2ui.o: $(CXX) `cat $(OBJDIR)/c_flags` $(GTK2_INCS) -DMSGQUAL=$* -c $< -o $@ $(OBJDIR)/shuttle.o: shuttle.C shuttle_keys.h - $(CXX) `cat $(OBJDIR)/c_flags` -DMSGQUAL=$* -c $< -o $@ shuttle_keys.h: /usr/include/X11/keysymdef.h sed < /usr/include/X11/keysymdef.h > shuttle_keys.h -f shuttle.sed diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgb161616.C b/cinelerra-5.1/cinelerra/overlay_direct_rgb161616.C new file mode 100644 index 00000000..7d2d97e2 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_rgb161616.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); +void DirectUnit::rgb161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgb888.C b/cinelerra-5.1/cinelerra/overlay_direct_rgb888.C new file mode 100644 index 00000000..bce36c07 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_rgb888.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); +void DirectUnit::rgb888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgb_float.C b/cinelerra-5.1/cinelerra/overlay_direct_rgb_float.C new file mode 100644 index 00000000..9b13df3e --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_rgb_float.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_float, z_float, 1.f, 3, 0, 0.f); +void DirectUnit::rgb_float() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgba16161616.C b/cinelerra-5.1/cinelerra/overlay_direct_rgba16161616.C new file mode 100644 index 00000000..3ee04e64 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_rgba16161616.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); +void DirectUnit::rgba16161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgba8888.C b/cinelerra-5.1/cinelerra/overlay_direct_rgba8888.C new file mode 100644 index 00000000..cb984df1 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_rgba8888.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); +void DirectUnit::rgba8888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgba_float.C b/cinelerra-5.1/cinelerra/overlay_direct_rgba_float.C new file mode 100644 index 00000000..f0c8afcf --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_rgba_float.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_float, z_float, 1.f, 4, 0, 0.f); +void DirectUnit::rgba_float() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_direct_yuv161616.C b/cinelerra-5.1/cinelerra/overlay_direct_yuv161616.C new file mode 100644 index 00000000..7d1673e9 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_yuv161616.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); +void DirectUnit::yuv161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_direct_yuv888.C b/cinelerra-5.1/cinelerra/overlay_direct_yuv888.C new file mode 100644 index 00000000..19b98b00 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_yuv888.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); +void DirectUnit::yuv888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_direct_yuva16161616.C b/cinelerra-5.1/cinelerra/overlay_direct_yuva16161616.C new file mode 100644 index 00000000..0c0c2e4e --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_yuva16161616.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); +void DirectUnit::yuva16161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_direct_yuva8888.C b/cinelerra-5.1/cinelerra/overlay_direct_yuva8888.C new file mode 100644 index 00000000..b1e67c6e --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_direct_yuva8888.C @@ -0,0 +1,5 @@ +#include "overlaydirect.h" +// parallel build +#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); +void DirectUnit::yuva8888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgb161616.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgb161616.C new file mode 100644 index 00000000..27d4e045 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_rgb161616.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); +void NNUnit::rgb161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgb888.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgb888.C new file mode 100644 index 00000000..5e651c75 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_rgb888.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); +void NNUnit::rgb888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgb_float.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgb_float.C new file mode 100644 index 00000000..61eb333c --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_rgb_float.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_float, z_float, 1.f, 3, 0, 0.f); +void NNUnit::rgb_float() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgba16161616.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgba16161616.C new file mode 100644 index 00000000..fe202aa4 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_rgba16161616.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); +void NNUnit::rgba16161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgba8888.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgba8888.C new file mode 100644 index 00000000..2f87451d --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_rgba8888.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); +void NNUnit::rgba8888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgba_float.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgba_float.C new file mode 100644 index 00000000..0e120691 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_rgba_float.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_float, z_float, 1.f, 4, 0, 0.f); +void NNUnit::rgba_float() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_yuv161616.C b/cinelerra-5.1/cinelerra/overlay_nearest_yuv161616.C new file mode 100644 index 00000000..6e5eee9d --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_yuv161616.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); +void NNUnit::yuv161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_yuv888.C b/cinelerra-5.1/cinelerra/overlay_nearest_yuv888.C new file mode 100644 index 00000000..d5b1c961 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_yuv888.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); +void NNUnit::yuv888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_yuva16161616.C b/cinelerra-5.1/cinelerra/overlay_nearest_yuva16161616.C new file mode 100644 index 00000000..0ce0d8e3 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_yuva16161616.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); +void NNUnit::yuva16161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_yuva8888.C b/cinelerra-5.1/cinelerra/overlay_nearest_yuva8888.C new file mode 100644 index 00000000..755137a3 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_nearest_yuva8888.C @@ -0,0 +1,5 @@ +#include "overlaynearest.h" +// parallel build +#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); +void NNUnit::yuva8888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgb161616.C b/cinelerra-5.1/cinelerra/overlay_sample_rgb161616.C new file mode 100644 index 00000000..ebec4586 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_rgb161616.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); +void SampleUnit::rgb161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgb888.C b/cinelerra-5.1/cinelerra/overlay_sample_rgb888.C new file mode 100644 index 00000000..a58ae5d3 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_rgb888.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); +void SampleUnit::rgb888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgb_float.C b/cinelerra-5.1/cinelerra/overlay_sample_rgb_float.C new file mode 100644 index 00000000..5fd17134 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_rgb_float.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_float, z_float, 1.f, 3, 0, 0.f); +void SampleUnit::rgb_float() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgba16161616.C b/cinelerra-5.1/cinelerra/overlay_sample_rgba16161616.C new file mode 100644 index 00000000..8956c327 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_rgba16161616.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); +void SampleUnit::rgba16161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgba8888.C b/cinelerra-5.1/cinelerra/overlay_sample_rgba8888.C new file mode 100644 index 00000000..c3e625ca --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_rgba8888.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); +void SampleUnit::rgba8888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgba_float.C b/cinelerra-5.1/cinelerra/overlay_sample_rgba_float.C new file mode 100644 index 00000000..84d517e0 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_rgba_float.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_float, z_float, 1.f, 4, 0, 0.f); +void SampleUnit::rgba_float() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_yuv161616.C b/cinelerra-5.1/cinelerra/overlay_sample_yuv161616.C new file mode 100644 index 00000000..be50f711 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_yuv161616.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); +void SampleUnit::yuv161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_yuv888.C b/cinelerra-5.1/cinelerra/overlay_sample_yuv888.C new file mode 100644 index 00000000..3692bd2b --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_yuv888.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); +void SampleUnit::yuv888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_yuva16161616.C b/cinelerra-5.1/cinelerra/overlay_sample_yuva16161616.C new file mode 100644 index 00000000..2e181204 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_yuva16161616.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); +void SampleUnit::yuva16161616() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlay_sample_yuva8888.C b/cinelerra-5.1/cinelerra/overlay_sample_yuva8888.C new file mode 100644 index 00000000..093b379c --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlay_sample_yuva8888.C @@ -0,0 +1,5 @@ +#include "overlaysample.h" +// parallel build +#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); +void SampleUnit::yuva8888() { BLEND_SWITCH(BLEND); } + diff --git a/cinelerra-5.1/cinelerra/overlaydirect.C b/cinelerra-5.1/cinelerra/overlaydirect.C index 3297fda3..5324401f 100644 --- a/cinelerra-5.1/cinelerra/overlaydirect.C +++ b/cinelerra-5.1/cinelerra/overlaydirect.C @@ -1,49 +1,8 @@ #include "overlayframe.h" +#include "overlaydirect.h" /* Direct translate / blend **********************************************/ -#define XBLEND(FN, temp_type, type, max, components, ofs, round) { \ - temp_type opcty = fade * max + round, trnsp = max - opcty; \ - type** output_rows = (type**)output->get_rows(); \ - type** input_rows = (type**)input->get_rows(); \ - ix *= components; ox *= components; \ - \ - for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \ - type* in_row = input_rows[i + iy] + ix; \ - type* output = output_rows[i] + ox; \ - for(int j = 0; j < ow; j++) { \ - if( components == 4 ) { \ - temp_type r, g, b, a; \ - ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \ - ALPHA4_STORE(output, ofs, max); \ - } \ - else { \ - temp_type r, g, b; \ - ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \ - ALPHA3_STORE(output, ofs, max); \ - } \ - in_row += components; output += components; \ - } \ - } \ - break; \ -} - -#define XBLEND_ONLY(FN) { \ - switch(input->get_color_model()) { \ - case BC_RGB_FLOAT: XBLEND(FN, z_float, z_float, 1.f, 3, 0, 0.f); \ - case BC_RGBA_FLOAT: XBLEND(FN, z_float, z_float, 1.f, 4, 0, 0.f); \ - case BC_RGB888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \ - case BC_YUV888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \ - case BC_RGBA8888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \ - case BC_YUVA8888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \ - case BC_RGB161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \ - case BC_YUV161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \ - case BC_RGBA16161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \ - case BC_YUVA16161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \ - } \ - break; \ -} - DirectPackage::DirectPackage() { } @@ -60,21 +19,29 @@ DirectUnit::~DirectUnit() void DirectUnit::process_package(LoadPackage *package) { - DirectPackage *pkg = (DirectPackage*)package; - - VFrame *output = engine->output; - VFrame *input = engine->input; - int mode = engine->mode; - float fade = - BC_CModels::has_alpha(input->get_color_model()) && + pkg = (DirectPackage*)package; + output = engine->output; + input = engine->input; + mode = engine->mode; + fade = BC_CModels::has_alpha(input->get_color_model()) && mode == TRANSFER_REPLACE ? 1.f : engine->alpha; - - int ix = engine->in_x1; - int ox = engine->out_x1; - int ow = engine->out_x2 - ox; - int iy = engine->in_y1 - engine->out_y1; - - BLEND_SWITCH(XBLEND_ONLY); + ix = engine->in_x1; + ox = engine->out_x1; + ow = engine->out_x2 - ox; + iy = engine->in_y1 - engine->out_y1; + + switch(input->get_color_model()) { + case BC_RGB_FLOAT: rgb_float(); break; + case BC_RGBA_FLOAT: rgba_float(); break; + case BC_RGB888: rgb888(); break; + case BC_YUV888: yuv888(); break; + case BC_RGBA8888: rgba8888(); break; + case BC_YUVA8888: yuva8888(); break; + case BC_RGB161616: rgb161616(); break; + case BC_YUV161616: yuv161616(); break; + case BC_RGBA16161616: rgba16161616(); break; + case BC_YUVA16161616: yuva16161616(); break; + } } DirectEngine::DirectEngine(int cpus) diff --git a/cinelerra-5.1/cinelerra/overlaydirect.h b/cinelerra-5.1/cinelerra/overlaydirect.h new file mode 100644 index 00000000..ebdc30f8 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlaydirect.h @@ -0,0 +1,30 @@ +#ifndef __OVERLAYDIRECT_H__ +#define __OVERLAYDIRECT_H__ +#include "overlayframe.h" + +#define XBLEND(FN, temp_type, type, max, components, ofs, round) { \ + temp_type opcty = fade * max + round, trnsp = max - opcty; \ + type** output_rows = (type**)output->get_rows(); \ + type** input_rows = (type**)input->get_rows(); \ + ix *= components; ox *= components; \ + \ + for( int i=pkg->out_row1; iout_row2; ++i ) { \ + type* in_row = input_rows[i + iy] + ix; \ + type* output = output_rows[i] + ox; \ + for( int j=ow; --j>=0; ) { \ + if( components == 4 ) { \ + temp_type r, g, b, a; \ + ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \ + ALPHA4_STORE(output, ofs, max); \ + } \ + else { \ + temp_type r, g, b; \ + ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \ + ALPHA3_STORE(output, ofs, max); \ + } \ + in_row += components; output += components; \ + } \ + } \ +} break + +#endif diff --git a/cinelerra-5.1/cinelerra/overlayframe.h b/cinelerra-5.1/cinelerra/overlayframe.h index 620f8e18..13c2e4bd 100644 --- a/cinelerra-5.1/cinelerra/overlayframe.h +++ b/cinelerra-5.1/cinelerra/overlayframe.h @@ -334,7 +334,6 @@ ZTYP(float); ZTYP(double); ALPHA_STORE(out, ofs, mx); \ out[3] = aclip(a, mx) - #define BLEND_SWITCH(FN) \ switch( mode ) { \ case TRANSFER_NORMAL: FN(NORMAL); \ @@ -420,6 +419,23 @@ public: void process_package(LoadPackage *package); DirectEngine *engine; + + DirectPackage *pkg; + int ix, iy, ox, ow; + VFrame *output, *input; + int mode; + float fade; + + void rgb_float(); + void rgba_float(); + void rgb888(); + void yuv888(); + void rgba8888(); + void yuva8888(); + void rgb161616(); + void yuv161616(); + void rgba16161616(); + void yuva16161616(); }; class NNUnit : public LoadClient @@ -429,8 +445,25 @@ public: ~NNUnit(); void process_package(LoadPackage *package); - NNEngine *engine; + + NNPackage *pkg; + int ix, iy, ox, ow; + VFrame *output, *input; + int mode; + float fade; + int *ly; + + void rgb_float(); + void rgba_float(); + void rgb888(); + void yuv888(); + void rgba8888(); + void yuva8888(); + void rgb161616(); + void yuv161616(); + void rgba16161616(); + void yuva16161616(); }; class SampleUnit : public LoadClient @@ -440,8 +473,28 @@ public: ~SampleUnit(); void process_package(LoadPackage *package); - SampleEngine *engine; + + SamplePackage *pkg; + VFrame *voutput, *vinput; + int mode; + float fade; + + int i1i, i2i, o1i, o2i, oh, kd; + float i1f, i2f, o1f, o2f, *k; + int *lookup_sx0, *lookup_sx1, *lookup_sk; + float *lookup_wacc; + + void rgb_float(); + void rgba_float(); + void rgb888(); + void yuv888(); + void rgba8888(); + void yuva8888(); + void rgb161616(); + void yuv161616(); + void rgba16161616(); + void yuva16161616(); }; diff --git a/cinelerra-5.1/cinelerra/overlaynearest.C b/cinelerra-5.1/cinelerra/overlaynearest.C index e4a842d7..faa7a65d 100644 --- a/cinelerra-5.1/cinelerra/overlaynearest.C +++ b/cinelerra-5.1/cinelerra/overlaynearest.C @@ -1,51 +1,8 @@ #include "overlayframe.h" +#include "overlaynearest.h" /* Nearest Neighbor scale / translate / blend ********************/ -#define XBLEND_3NN(FN, temp_type, type, max, components, ofs, round) { \ - temp_type opcty = fade * max + round, trnsp = max - opcty; \ - type** output_rows = (type**)output->get_rows(); \ - type** input_rows = (type**)input->get_rows(); \ - ox *= components; \ - \ - for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \ - int *lx = engine->in_lookup_x; \ - type* in_row = input_rows[*ly++]; \ - type* output = output_rows[i] + ox; \ - for(int j = 0; j < ow; j++) { \ - in_row += *lx++; \ - if( components == 4 ) { \ - temp_type r, g, b, a; \ - ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \ - ALPHA4_STORE(output, ofs, max); \ - } \ - else { \ - temp_type r, g, b; \ - ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \ - ALPHA3_STORE(output, ofs, max); \ - } \ - output += components; \ - } \ - } \ - break; \ -} - -#define XBLEND_NN(FN) { \ - switch(input->get_color_model()) { \ - case BC_RGB_FLOAT: XBLEND_3NN(FN, z_float, z_float, 1.f, 3, 0, 0.f); \ - case BC_RGBA_FLOAT: XBLEND_3NN(FN, z_float, z_float, 1.f, 4, 0, 0.f); \ - case BC_RGB888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \ - case BC_YUV888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \ - case BC_RGBA8888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \ - case BC_YUVA8888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \ - case BC_RGB161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \ - case BC_YUV161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \ - case BC_RGBA16161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \ - case BC_YUVA16161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \ - } \ - break; \ -} - NNPackage::NNPackage() { } @@ -62,19 +19,29 @@ NNUnit::~NNUnit() void NNUnit::process_package(LoadPackage *package) { - NNPackage *pkg = (NNPackage*)package; - VFrame *output = engine->output; - VFrame *input = engine->input; - int mode = engine->mode; - float fade = - BC_CModels::has_alpha(input->get_color_model()) && + pkg = (NNPackage*)package; + output = engine->output; + input = engine->input; + mode = engine->mode; + fade = BC_CModels::has_alpha(input->get_color_model()) && mode == TRANSFER_REPLACE ? 1.f : engine->alpha; - int ox = engine->out_x1i; - int ow = engine->out_x2i - ox; - int *ly = engine->in_lookup_y + pkg->out_row1; + ox = engine->out_x1i; + ow = engine->out_x2i - ox; + ly = engine->in_lookup_y + pkg->out_row1; - BLEND_SWITCH(XBLEND_NN); + switch(input->get_color_model()) { + case BC_RGB_FLOAT: rgb_float(); break; + case BC_RGBA_FLOAT: rgba_float(); break; + case BC_RGB888: rgb888(); break; + case BC_YUV888: yuv888(); break; + case BC_RGBA8888: rgba8888(); break; + case BC_YUVA8888: yuva8888(); break; + case BC_RGB161616: rgb161616(); break; + case BC_YUV161616: yuv161616(); break; + case BC_RGBA16161616: rgba16161616(); break; + case BC_YUVA16161616: yuva16161616(); break; + } } NNEngine::NNEngine(int cpus) diff --git a/cinelerra-5.1/cinelerra/overlaynearest.h b/cinelerra-5.1/cinelerra/overlaynearest.h new file mode 100644 index 00000000..0f7c62db --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlaynearest.h @@ -0,0 +1,32 @@ +#ifndef __OVERLAYNEAREST_H__ +#define __OVERLAYNEAREST_H__ +#include "overlayframe.h" + +#define XBLEND_3NN(FN, temp_type, type, max, components, ofs, round) { \ + temp_type opcty = fade * max + round, trnsp = max - opcty; \ + type** output_rows = (type**)output->get_rows(); \ + type** input_rows = (type**)input->get_rows(); \ + ox *= components; \ + \ + for( int i=pkg->out_row1; iout_row2; ++i ) { \ + int *lx = engine->in_lookup_x; \ + type* in_row = input_rows[*ly++]; \ + type* output = output_rows[i] + ox; \ + for( int j=ow; --j>=0; ) { \ + in_row += *lx++; \ + if( components == 4 ) { \ + temp_type r, g, b, a; \ + ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \ + ALPHA4_STORE(output, ofs, max); \ + } \ + else { \ + temp_type r, g, b; \ + ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \ + ALPHA3_STORE(output, ofs, max); \ + } \ + output += components; \ + } \ + } \ +} break + +#endif diff --git a/cinelerra-5.1/cinelerra/overlaysample.C b/cinelerra-5.1/cinelerra/overlaysample.C index a4b485d7..09d3d3ae 100644 --- a/cinelerra-5.1/cinelerra/overlaysample.C +++ b/cinelerra-5.1/cinelerra/overlaysample.C @@ -1,93 +1,9 @@ #include "overlayframe.h" +#include "overlaysample.h" /* Fully resampled scale / translate / blend ******************************/ /* resample into a temporary row vector, then blend */ -#define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \ - float temp[oh*components]; \ - temp_type opcty = fade * max + round, trnsp = max - opcty; \ - type **output_rows = (type**)voutput->get_rows() + o1i; \ - type **input_rows = (type**)vinput->get_rows(); \ - \ - for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \ - type *input = input_rows[i - engine->col_out1 + engine->row_in]; \ - float *tempp = temp; \ - if( !k ) { /* direct copy case */ \ - type *ip = input + i1i * components; \ - for(int j = 0; j < oh; j++) { \ - *tempp++ = *ip++; \ - *tempp++ = *ip++ - ofs; \ - *tempp++ = *ip++ - ofs; \ - if( components == 4 ) *tempp++ = *ip++; \ - } \ - } \ - else { /* resample */ \ - for(int j = 0; j < oh; j++) { \ - float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \ - int ki = lookup_sk[j], x = lookup_sx0[j]; \ - type *ip = input + x * components; \ - while(x < lookup_sx1[j]) { \ - float kv = k[abs(ki >> INDEX_FRACTION)]; \ - /* handle fractional pixels on edges of input */ \ - if(x == i1i) kv *= i1f; \ - if(++x == i2i) kv *= i2f; \ - racc += kv * *ip++; \ - gacc += kv * (*ip++ - ofs); \ - bacc += kv * (*ip++ - ofs); \ - if( components == 4 ) { aacc += kv * *ip++; } \ - ki += kd; \ - } \ - float wacc = lookup_wacc[j]; \ - *tempp++ = racc * wacc; \ - *tempp++ = gacc * wacc; \ - *tempp++ = bacc * wacc; \ - if( components == 4 ) { *tempp++ = aacc * wacc; } \ - } \ - } \ - \ - /* handle fractional pixels on edges of output */ \ - temp[0] *= o1f; temp[1] *= o1f; temp[2] *= o1f; \ - if( components == 4 ) temp[3] *= o1f; \ - tempp = temp + (oh-1)*components; \ - tempp[0] *= o2f; tempp[1] *= o2f; tempp[2] *= o2f; \ - if( components == 4 ) tempp[3] *= o2f; \ - tempp = temp; \ - /* blend output */ \ - for(int j = 0; j < oh; j++) { \ - type *output = output_rows[j] + i * components; \ - if( components == 4 ) { \ - temp_type r, g, b, a; \ - ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \ - ALPHA4_STORE(output, ofs, max); \ - } \ - else { \ - temp_type r, g, b; \ - ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \ - ALPHA3_STORE(output, ofs, max); \ - } \ - tempp += components; \ - } \ - } \ - break; \ -} - -#define XBLEND_SAMPLE(FN) { \ - switch(vinput->get_color_model()) { \ - case BC_RGB_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 3, 0.f, 0.f); \ - case BC_RGBA_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 4, 0.f, 0.f); \ - case BC_RGB888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \ - case BC_YUV888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \ - case BC_RGBA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \ - case BC_YUVA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \ - case BC_RGB161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \ - case BC_YUV161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \ - case BC_RGBA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \ - case BC_YUVA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \ - } \ - break; \ -} - - SamplePackage::SamplePackage() { } @@ -104,7 +20,7 @@ SampleUnit::~SampleUnit() void SampleUnit::process_package(LoadPackage *package) { - SamplePackage *pkg = (SamplePackage*)package; + pkg = (SamplePackage*)package; float i1 = engine->in1; float i2 = engine->in2; @@ -114,36 +30,46 @@ void SampleUnit::process_package(LoadPackage *package) if(i2 - i1 <= 0 || o2 - o1 <= 0) return; - VFrame *voutput = engine->output; - VFrame *vinput = engine->input; - int mode = engine->mode; - float fade = - BC_CModels::has_alpha(vinput->get_color_model()) && + voutput = engine->output; + vinput = engine->input; + mode = engine->mode; + fade = BC_CModels::has_alpha(vinput->get_color_model()) && mode == TRANSFER_REPLACE ? 1.f : engine->alpha; - //int iw = vinput->get_w(); - int i1i = floor(i1); - int i2i = ceil(i2); - float i1f = 1.f - i1 + i1i; - float i2f = 1.f - i2i + i2; - - int o1i = floor(o1); - int o2i = ceil(o2); - float o1f = 1.f - o1 + o1i; - float o2f = 1.f - o2i + o2; - int oh = o2i - o1i; - - float *k = engine->kernel->lookup; - //float kw = engine->kernel->width; - //int kn = engine->kernel->n; - int kd = engine->kd; - - int *lookup_sx0 = engine->lookup_sx0; - int *lookup_sx1 = engine->lookup_sx1; - int *lookup_sk = engine->lookup_sk; - float *lookup_wacc = engine->lookup_wacc; - - BLEND_SWITCH(XBLEND_SAMPLE); + //iw = vinput->get_w(); + i1i = floor(i1); + i2i = ceil(i2); + i1f = 1.f - i1 + i1i; + i2f = 1.f - i2i + i2; + + o1i = floor(o1); + o2i = ceil(o2); + o1f = 1.f - o1 + o1i; + o2f = 1.f - o2i + o2; + oh = o2i - o1i; + + k = engine->kernel->lookup; + //kw = engine->kernel->width; + //kn = engine->kernel->n; + kd = engine->kd; + + lookup_sx0 = engine->lookup_sx0; + lookup_sx1 = engine->lookup_sx1; + lookup_sk = engine->lookup_sk; + lookup_wacc = engine->lookup_wacc; + + switch( vinput->get_color_model() ) { + case BC_RGB_FLOAT: rgb_float(); break; + case BC_RGBA_FLOAT: rgba_float(); break; + case BC_RGB888: rgb888(); break; + case BC_YUV888: yuv888(); break; + case BC_RGBA8888: rgba8888(); break; + case BC_YUVA8888: yuva8888(); break; + case BC_RGB161616: rgb161616(); break; + case BC_YUV161616: yuv161616(); break; + case BC_RGBA16161616: rgba16161616(); break; + case BC_YUVA16161616: yuva16161616(); break; + } } diff --git a/cinelerra-5.1/cinelerra/overlaysample.h b/cinelerra-5.1/cinelerra/overlaysample.h new file mode 100644 index 00000000..1d7d44a1 --- /dev/null +++ b/cinelerra-5.1/cinelerra/overlaysample.h @@ -0,0 +1,72 @@ +#ifndef __OVERLAYSAMPLE_H__ +#define __OVERLAYSAMPLE_H__ +#include "overlayframe.h" + +#define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \ + float temp[oh*components]; \ + temp_type opcty = fade * max + round, trnsp = max - opcty; \ + type **output_rows = (type**)voutput->get_rows() + o1i; \ + type **input_rows = (type**)vinput->get_rows(); \ + \ + for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \ + type *input = input_rows[i - engine->col_out1 + engine->row_in]; \ + float *tempp = temp; \ + if( !k ) { /* direct copy case */ \ + type *ip = input + i1i * components; \ + for( int j=oh; --j>=0; ) { \ + *tempp++ = *ip++; \ + *tempp++ = *ip++ - ofs; \ + *tempp++ = *ip++ - ofs; \ + if( components == 4 ) *tempp++ = *ip++; \ + } \ + } \ + else { /* resample */ \ + for( int j=0; j> INDEX_FRACTION)]; \ + /* handle fractional pixels on edges of input */ \ + if(x == i1i) kv *= i1f; \ + if(++x == i2i) kv *= i2f; \ + racc += kv * *ip++; \ + gacc += kv * (*ip++ - ofs); \ + bacc += kv * (*ip++ - ofs); \ + if( components == 4 ) { aacc += kv * *ip++; } \ + ki += kd; \ + } \ + float wacc = lookup_wacc[j]; \ + *tempp++ = racc * wacc; \ + *tempp++ = gacc * wacc; \ + *tempp++ = bacc * wacc; \ + if( components == 4 ) { *tempp++ = aacc * wacc; } \ + } \ + } \ + \ + /* handle fractional pixels on edges of output */ \ + temp[0] *= o1f; temp[1] *= o1f; temp[2] *= o1f; \ + if( components == 4 ) temp[3] *= o1f; \ + tempp = temp + (oh-1)*components; \ + tempp[0] *= o2f; tempp[1] *= o2f; tempp[2] *= o2f; \ + if( components == 4 ) tempp[3] *= o2f; \ + tempp = temp; \ + /* blend output */ \ + for( int j=0; j