admin管理员组文章数量:1291178
The wikipedia article about flood fill describes the following recursive algorithm:
Flood-fill (node):
- If node is not Inside return.
- Set the node
- Perform Flood-fill one step to the south of node.
- Perform Flood-fill one step to the north of node
- Perform Flood-fill one step to the west of node
- Perform Flood-fill one step to the east of node
- Return.
Because I couldn't find too much examples of flood-filling using the assembly language, I implemented this myself (using FASM) and made - I think - a rather decent version. There's much worse you can find here and here!
For those that are inquisitive, performance-wise the order that the neighboring pixels are visited does not change much. So it does not need to be South->North->West->East. Any of the 24 permutations will do, but better not pick an ordering where you don't immediately follow a direction by its opposite direction, because those will double the stack usage! The good-ones are SNWE, NSWE, SNEW, NSEW, WESN, EWSN, WENS, and EWNS.
The program works correctly but also produces flicker on screen. That's probably due to using a recursive solution for flood-filling... But is it?
; ------------------------------
; BL is OldColor
; BH is NewColor
; CX is X
; DX is Y
; IN (bx,cx,dx)
FloodWhile:
pusha
mov al, bl ; NewColor
mov ah, 0Ch
mov si, ax ; CONST
shr bx, 8 ; -> BL is OldColor, BH is DisplayPage
cmp al, bl ; NewColor must be different from OldColor
je .z
mov ah, 0Dh ; BIOS.ReadPixel
int 10h ; -> AL
cmp al, bl ; First pixel must have OldColor
jne .z
call .Flood ; -> (AX)
.z: popa
ret
; - - - - - - - - - - - - - - -
; IN (bx,cx,dx,si) OUT () MOD (ax)
.Flood:
push di
mov di, 12
mov ax, si ; -> AL is NewColor, AH is FunctionID
int 10h ; BIOS.WritePixel
.a: add cx, [.z+di] ; Try a neighboring pixel (SNWE)
cmp cx, 320
jnb .b
add dx, [.z+di+2]
cmp dx, 200
jnb .b
mov ah, 0Dh ; BIOS.ReadPixel
int 10h ; -> AL
cmp al, bl
jne .b
call .Flood
.b: sub di, 4
jns .a
dec cx ; Return from East
pop di
ret
; East West North South <-- SNWE
.z: dw +2,0, -1,+1, 0,-2, 0,+1
; ------------------------------
The wikipedia article about flood fill describes the following recursive algorithm:
Flood-fill (node):
- If node is not Inside return.
- Set the node
- Perform Flood-fill one step to the south of node.
- Perform Flood-fill one step to the north of node
- Perform Flood-fill one step to the west of node
- Perform Flood-fill one step to the east of node
- Return.
Because I couldn't find too much examples of flood-filling using the assembly language, I implemented this myself (using FASM) and made - I think - a rather decent version. There's much worse you can find here and here!
For those that are inquisitive, performance-wise the order that the neighboring pixels are visited does not change much. So it does not need to be South->North->West->East. Any of the 24 permutations will do, but better not pick an ordering where you don't immediately follow a direction by its opposite direction, because those will double the stack usage! The good-ones are SNWE, NSWE, SNEW, NSEW, WESN, EWSN, WENS, and EWNS.
The program works correctly but also produces flicker on screen. That's probably due to using a recursive solution for flood-filling... But is it?
; ------------------------------
; BL is OldColor
; BH is NewColor
; CX is X
; DX is Y
; IN (bx,cx,dx)
FloodWhile:
pusha
mov al, bl ; NewColor
mov ah, 0Ch
mov si, ax ; CONST
shr bx, 8 ; -> BL is OldColor, BH is DisplayPage
cmp al, bl ; NewColor must be different from OldColor
je .z
mov ah, 0Dh ; BIOS.ReadPixel
int 10h ; -> AL
cmp al, bl ; First pixel must have OldColor
jne .z
call .Flood ; -> (AX)
.z: popa
ret
; - - - - - - - - - - - - - - -
; IN (bx,cx,dx,si) OUT () MOD (ax)
.Flood:
push di
mov di, 12
mov ax, si ; -> AL is NewColor, AH is FunctionID
int 10h ; BIOS.WritePixel
.a: add cx, [.z+di] ; Try a neighboring pixel (SNWE)
cmp cx, 320
jnb .b
add dx, [.z+di+2]
cmp dx, 200
jnb .b
mov ah, 0Dh ; BIOS.ReadPixel
int 10h ; -> AL
cmp al, bl
jne .b
call .Flood
.b: sub di, 4
jns .a
dec cx ; Return from East
pop di
ret
; East West North South <-- SNWE
.z: dw +2,0, -1,+1, 0,-2, 0,+1
; ------------------------------
Share
Improve this question
asked Feb 13 at 19:49
Sep RolandSep Roland
39.7k9 gold badges48 silver badges88 bronze badges
1 Answer
Reset to default 1That's probably due to using a recursive solution for flood-filling... But is it?
It is true that recursion is infamous for its potential for overflowing the stack, but recursion need not be slow per se. The first code snippet that I'll show below will use a re-worked recursive algorithm and it will run about 62 times faster than yours. Its iterative friend comes in at just 10% faster.
The program works correctly but also produces flicker on screen.
Flicker can go away by relying on very fast graphics routines, or by applying graphics routines of any speed to an offscreen memory buffer followed by updating the real screen in one very fast copy operation. The three code snippets that I show below will be using a hybrid approach: simultaneously writing to an offscreen buffer and to the real screen.
The major changes include:
- Not using BIOS for pixel input/output for a 5x speed increase. BIOS is a generalist and so there's simply too much overhead involved in addressing the screen. If we address the screen ourselves, we can handpick the best way to do it and cut corners however we like.
- Not reading from the video memory for a 12x speed increase. Accessing video memory and especially reading from it, is a very costly operation. I maintain a so-called double buffer which mirrors what is on the real screen, and whenever I need to read a pixel I read from that buffer. Conversely, whenever I need to write a pixel I write it to both that buffer and the real screen.
- Not exclusively dealing with single pixels for a 2x speed increase. The optimal solution gathers as many as possible complying pixels from the same scanline, and then uses an efficient line-drawing code speeding up the output.
The tests ran on the legacy 320x200 256-color screen.
Flood-filling a circle (R=75) producing a plain disc:
asm | stack | heap | reads | writes | pixels/second | ||
---|---|---|---|---|---|---|---|
Question | 90 | 35300 | 0 | 69925 | 17481 | 86865 | |
Good | 103 | 17650 | 0 | 69925 | 17481 | 5323081 | 61x |
Better | 150 | 0 | 1192 | 69925 | 17481 | 5943896 | 68x |
Best | 206 | 0 | 12 | 35557 | 17481 | 9578630 | 110x |
Flood-filling a labyrinth of concentric circles:
asm | stack | heap | reads | writes | pixels/second | ||
---|---|---|---|---|---|---|---|
Question | 90 | 38156 | 0 | 58273 | 14568 | 86791 | |
Good | 103 | 19078 | 0 | 58273 | 14568 | 5524459 | 63x |
Better | 150 | 0 | 156 | 58273 | 14568 | 5924359 | 68x |
Best | 206 | 0 | 168 | 38092 | 14568 | 9410852 | 108x |
62x faster than the original
This is the recursive solution but with optimizations applied, eg. the code does but a single address calculation based on X and Y, and then applies suitable increments to the scanline address and to the offset within the scanline.
; ------------------------------
; AL is OldColor
; AH is NewColor
; BX is X
; CX is Y
; GS is DoubleBuffer (64KB)
; IN (ax,bx,cx,GS)
GoodFloodWhile:
push es
pusha
mov di, 0A000h
mov es, di
mov di, bx ; X
imul bp, cx, 320 ; Y * 320
cmp al, ah ; NewColor must be different from OldColor
je .z
cmp [GS:bp+di], ah ; First pixel must have OldColor
jne .z
call .Flood
.z: popa
pop es
ret
; - - - - - - - - - - - - - - -
ALIGN 16
; IN (ax,di,bp,es,GS) OUT ()
.Flood:
mov [es:bp+di], al ; Center pixel on Screen
mov [GS:bp+di], al ; Center pixel in Double Buffer
inc di ; Try pixel to the right
cmp di, 320
jnb .NotR
cmp [GS:bp+di], ah ; OldColor
jne .NotR
call .Flood
.NotR:
dec di
dec di ; Try pixel to the left
js .NotL
cmp [GS:bp+di], ah ; OldColor
jne .NotL
call .Flood
.NotL:
inc di
add bp, 320 ; Try pixel downward
cmp bp, 200*320
jnb .NotD
cmp [GS:bp+di], ah ; OldColor
jne .NotD
call .Flood
.NotD:
sub bp, 640 ; Try pixel upward
jb .NotU
cmp [GS:bp+di], ah ; OldColor
jne .NotU
call .Flood
.NotU:
add bp, 320
ret
; ------------------------------
68x faster than the original
This iterative solution uses a small (2KB) ringbuffer where it doesn't just record X and Y coordinates, but rather stores scanline addresses and offsets within the scanline. The motivation here was to avoid the many address calculations that require multiplication with the bytes-per-scanline value.
Always choose a power-of-two for the size of the ringbuffer. That way, the wraparound for the read- and write pointers can be just one simple and
operation. The ringbuffer works FirstInFirstOut (FIFO) as opposed to the stack that works LastInFirstOut (LIFO). This is an important fact because if you were to use a LIFO buffer for your iterative solution that buffer would also consume a lot of bytes (just like it was with the recursion's stack).
Tip: provided that you insert a suitable delay, this can be a good code to animate a liquid running through pipes...
; ------------------------------
; AL is OldColor
; AH is NewColor
; BX is X
; CX is Y
; GS is DoubleBuffer (64KB)
; IN (ax,bx,cx,GS)
BetterFloodWhile:
push es
pusha
mov di, 0A000h
mov es, di
mov di, bx ; X
imul bp, cx, 320 ; Y * 320
cmp al, ah ; NewColor must be different from OldColor
je .z
cmp [GS:bp+di], ah ; First pixel must have OldColor
jne .z
xor si, si ; SI is OffsetReadPointer in FIFO buffer
xor bx, bx ; BX is OffsetWritePointer in FIFO buffer
call .Store ; -> BX
call .Flood ; -> (BX SI DI BP)
.z: popa
pop es
ret
; - - - - - - - - - - - - - - -
ALIGN 16
; IN (ax,bx,si,di,bp,es,GS) OUT () MOD (bx,si,di,bp)
.Flood:
mov di, [Buffer+si] ; Offset in scanline
mov bp, [Buffer+si+2] ; Address of scanline
add si, 4
and si, 2047 ; 2KB Ringbuffer (FIFO)
inc di ; Try pixel to the right
cmp di, 320
jnb .NotR
cmp [GS:bp+di], ah ; OldColor
jne .NotR
call .Store ; -> BX
.NotR:
dec di
dec di ; Try pixel to the left
js .NotL
cmp [GS:bp+di], ah ; OldColor
jne .NotL
call .Store ; -> BX
.NotL:
inc di
add bp, 320 ; Try pixel downward
cmp bp, 200*320
jnb .NotD
cmp [GS:bp+di], ah ; OldColor
jne .NotD
call .Store ; -> BX
.NotD:
sub bp, 640 ; Try pixel upward
jb .NotU
cmp [GS:bp+di], ah ; OldColor
jne .NotU
call .Store ; -> BX
.NotU:
cmp si, bx
jne .Flood
ret
; - - - - - - - - - - - - - - -
ALIGN 16
; IN (al,bx,di,bp,es,GS) OUT (bx)
.Store:
mov [es:bp+di], al ; Center pixel on Screen
mov [GS:bp+di], al ; Center pixel in Double Buffer
mov [Buffer+bx], di
mov [Buffer+bx+2], bp
add bx, 4
and bx, 2047 ; 2KB Ringbuffer (FIFO)
ret
; ------------------------------
109x faster than the original
The focus here is to find long runs of pixels (aka spans) that can be drawn with an efficient line-drawing method.
Code alignment proved to be important in this code.
It was the excellent work of Lode Vandevenne that inspired me to write this optimal solution.
; ------------------------------
; AL is OldColor
; AH is NewColor
; BX is X
; CX is Y
; GS is DoubleBuffer (64KB)
ALIGN 16
DB 4 dup 0
; IN (ax,bx,cx,GS)
BestFloodWhile:
push es
pusha
mov dx, cx ; Y
cmp al, ah ; NewColor must be different from OldColor
je .Done
xor si, si
xor cx, cx
; - - - - - - - - - - - - - - -
.Again:
imul di, dx, 320
cmp [GS:di+bx+0], ah ; OldColor
jne .z ; Got colored in the mean time
lea bp, [word bx+0]
add bx, cx ; Avoids retesting CX pixels
.a: add bx, 1 ; X++ Extend to the right
cmp bx, 320
jnb .b
cmp [GS:di+bx], ah ; OldColor
je .a
.b: xchg bp, bx
.c: dec bx ; X-- Extend to the left
js .d
cmp [GS:di+bx], ah ; OldColor
je .c
.d: inc bx ; X++
sub bp, bx ; -> BP is length of this line
mov cx, 0A000h ; NewColor line on screen
call .Line ; -> CX=0 (ES)
mov cx, GS ; NewColor line in Double Buffer
call .Line ; -> CX=0 (ES)
sub di, 320
dec dx ; Y-- Check above
js .e
push bp ; (1)
call .Scan ; -> BX SI (BP=0)
pop bp ; (1)
sub bx, bp
.e: inc dx ; Y++
add di, 640
inc dx ; Y++ Check below
cmp dx, 200
jnb .f
call .Scan ; -> BX SI (BP=0)
.f: dec dx ; Y--
; - - - - - - - - - - - - - - -
.z: sub si, 6
jb .Done
mov bx, [Buffer+si] ; X
mov dx, [Buffer+si+2] ; Y
mov cx, [Buffer+si+4] ; L-1
jmp .Again
; - - - - - - - - - - - - - - -
.Done:
popa
pop es
ret
; - - - - - - - - - - - - - - -
ALIGN 16
; IN (al,bx,cx,di,bp) OUT (cx=0) MOD (es)
.Line: ; Draw BP long line at [cx:di+bx]
mov es, cx
add di, bx
mov cx, bp ; BP is 1+
test di, 1 ; Use aligned words
jz .y
stosb
dec cx ; -> CX is 0+
.y: shr cx, 1 ; -> CF
push ax
mov ah, al
rep stosw
pop ax
jnc .x
stosb
.x: sub di, bp
sub di, bx
ret
; - - - - - - - - - - - - - - -
ALIGN 16
; IN (ah,bx,cx=0,dx,si,di,bp,GS) OUT (bx,si) MOD (bp=0)
.Scan: ; Scan BP long line at [GS:di+bx]
cmp [GS:di+bx], ah ; OldColor
je .v ; Part of a span
.w: add bx, 1 ; X++
dec bp
jnz .Scan
ret
.v: add si, word 6 ; Begin a new span
mov [Buffer+si-6], bx ; X
mov [Buffer+si-4], dx ; Y
mov [Buffer+si-2], cx ; L-1 Counts the additional pixels on the right
inc bx ; X++
dec bp
jz .t
.u: cmp [GS:di+bx], ah ; OldColor
jne .w ; End current span
inc word [Buffer+si-2] ; Cont current span, add to 'additional pixels'
inc bx ; X++
dec bp
jnz .u
.t: ret
; ------------------------------
本文标签: recursionAvoiding flicker while using a recursive flood fill algorithmStack Overflow
版权声明:本文标题:recursion - Avoiding flicker while using a recursive flood fill algorithm - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1741508218a2382447.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论