KCF跟踪算法在Python中的应用
最编程
2024-08-15 13:19:20
...
1.代码链接:https://github.com/uoip/KCFpy
2.代码修改下:直接用没用起来,各种小问题,python版本不同造成的。
2.1fhog代码
import numpy as np
import cv2
from numba import jit
# constant
NUM_SECTOR = 9
FLT_EPSILON = 1e-07
@jit
def func1(dx, dy, boundary_x, boundary_y, height, width, numChannels):
r = np.zeros((height, width), np.float32)
alfa = np.zeros((height, width, 2), np.int)
for j in range(1, height-1):
for i in range(1, width-1):
c = 0
x = dx[j, i, c]
y = dy[j, i, c]
r[j, i] = np.sqrt(x*x + y*y)
for ch in range(1, numChannels):
tx = dx[j, i, ch]
ty = dy[j, i, ch]
magnitude = np.sqrt(tx*tx + ty*ty)
if(magnitude > r[j, i]):
r[j, i] = magnitude
c = ch
x = tx
y = ty
mmax = boundary_x[0]*x + boundary_y[0]*y
maxi = 0
for kk in range(0, NUM_SECTOR):
dotProd = boundary_x[kk]*x + boundary_y[kk]*y
if(dotProd > mmax):
mmax = dotProd
maxi = kk
elif(-dotProd > mmax):
mmax = -dotProd
maxi = kk + NUM_SECTOR
alfa[j, i, 0] = maxi % NUM_SECTOR
alfa[j, i, 1] = maxi
return r, alfa
@jit
def func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize):
mapp = np.zeros((sizeX*sizeY*p), np.float32)
for i in range(sizeY):
for j in range(sizeX):
for ii in range(k):
for jj in range(k):
if((i * k + ii > 0) and (i * k + ii < height - 1) and (j * k + jj > 0) and (j * k + jj < width - 1)):
mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0]
mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0]
if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1)):
mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0]
mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0]
if((j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)):
mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1]
mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1]
if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1) and (j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)):
mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1]
mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1]
return mapp
@jit
def func3(partOfNorm, mappmap, sizeX, sizeY, p, xp, pp):
newData = np.zeros((sizeY*sizeX*pp), np.float32)
for i in range(1, sizeY+1):
for j in range(1, sizeX+1):
pos1 = i * (sizeX+2) * xp + j * xp
pos2 = (i-1) * sizeX * pp + (j-1) * pp
valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] +
partOfNorm[(i )*(sizeX + 2) + (j + 1)] +
partOfNorm[(i + 1)*(sizeX + 2) + (j )] +
partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON
newData[pos2:pos2+p] = mappmap[pos1:pos1+p] / valOfNorm
newData[pos2+4*p:pos2+6*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] +
partOfNorm[(i )*(sizeX + 2) + (j + 1)] +
partOfNorm[(i - 1)*(sizeX + 2) + (j )] +
partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON
newData[pos2+p:pos2+2*p] = mappmap[pos1:pos1+p] / valOfNorm
newData[pos2+6*p:pos2+8*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] +
partOfNorm[(i )*(sizeX + 2) + (j - 1)] +
partOfNorm[(i + 1)*(sizeX + 2) + (j )] +
partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON
newData[pos2+2*p:pos2+3*p] = mappmap[pos1:pos1+p] / valOfNorm
newData[pos2+8*p:pos2+10*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] +
partOfNorm[(i )*(sizeX + 2) + (j - 1)] +
partOfNorm[(i - 1)*(sizeX + 2) + (j )] +
partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON
newData[pos2+3*p:pos2+4*p] = mappmap[pos1:pos1+p] / valOfNorm
newData[pos2+10*p:pos2+12*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
return newData
@jit
def func4(mappmap, p, sizeX, sizeY, pp, yp, xp, nx, ny):
newData = np.zeros((sizeX*sizeY*pp), np.float32)
for i in range(sizeY):
for j in range(sizeX):
pos1 = (i*sizeX + j) * p
pos2 = (i*sizeX + j) * pp
for jj in range(2 * xp): # 2*9
newData[pos2 + jj] = np.sum(mappmap[pos1 + yp*xp + jj : pos1 + 3*yp*xp + jj : 2*xp]) * ny
for jj in range(xp): # 9
newData[pos2 + 2*xp + jj] = np.sum(mappmap[pos1 + jj : pos1 + jj + yp*xp : xp]) * ny
for ii in range(yp): # 4
newData[pos2 + 3*xp + ii] = np.sum(mappmap[pos1 + yp*xp + ii*xp*2 : pos1 + yp*xp + ii*xp*2 + 2*xp]) * nx
return newData
def getFeatureMaps(image, k, mapp):
kernel = np.array([[-1., 0., 1.]], np.float32)
height = image.shape[0]
width = image.shape[1]
assert(image.ndim==3 and image.shape[2])
numChannels = 3 #(1 if image.ndim==2 else image.shape[2])
print("image1111 = ", image.shape)
sizeX = width / k
sizeY = height / k
print("sizeX = ", sizeX)
print("sizeY = ", sizeY)
px = 3 * NUM_SECTOR
p = px
stringSize = sizeX * p
mapp['sizeX'] = sizeX
mapp['sizeY'] = sizeY
mapp['numFeatures'] = p
mapp['map'] = np.zeros(int(mapp['sizeX']*mapp['sizeY']*mapp['numFeatures']), np.float32)
dx = cv2.filter2D(np.float32(image), -1, kernel) # np.float32(...) is necessary
dy = cv2.filter2D(np.float32(image), -1, kernel.T)
arg_vector = np.arange(NUM_SECTOR+1).astype(np.float32) * np.pi / NUM_SECTOR
boundary_x = np.cos(arg_vector)
boundary_y = np.sin(arg_vector)
'''
### original implementation
r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #func1 without @jit ###
### 40x speedup
magnitude = np.sqrt(dx**2 + dy**2)
r = np.max(magnitude, axis=2)
c = np.argmax(magnitude, axis=2)
idx = (np.arange(c.shape[0])[:,np.newaxis], np.arange(c.shape[1]), c)
x, y = dx[idx], dy[idx]
dotProd = x[:,:,np.newaxis] * boundary_x[np.newaxis,np.newaxis,:] + y[:,:,np.newaxis] * boundary_y[np.newaxis,np.newaxis,:]
dotProd = np.concatenate((dotProd, -dotProd), axis=2)
maxi = np.argmax(dotProd, axis=2)
alfa = np.dstack((maxi % NUM_SECTOR, maxi)) ###
'''
### 200x speedup
r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #with @jit
### ~0.001s
nearest = np.ones((k), np.int)
print("k ==============", k)
nearest[0:int(k/2)] = -1
w = np.zeros((k, 2), np.float32)
a_x = np.concatenate((k/2 - np.arange(k/2) - 0.5, np.arange(k/2,k) - k/2 + 0.5)).astype(np.float32)
b_x = np.concatenate((k/2 + np.arange(k/2) + 0.5, -np.arange(k/2,k) + k/2 - 0.5 + k)).astype(np.float32)
w[:, 0] = 1.0 / a_x * ((a_x*b_x) / (a_x+b_x))
w[:, 1] = 1.0 / b_x * ((a_x*b_x) / (a_x+b_x))
'''
### original implementation
mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize) #func2 without @jit ###
'''
### 500x speedup
print("stringSize = ", stringSize)
print("p = ", p)
print("k = ", k)
print("height = ", height)
print("width11111 = ", width)
print("sizeX11111 = ", sizeX)
print("sizeY11111 = ", sizeY)
mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, int(sizeX), int(sizeY), p, int(stringSize)) #with @jit
### ~0.001s
return mapp
def normalizeAndTruncate(mapp, alfa):
sizeX = mapp['sizeX']
sizeY = mapp['sizeY']
p = NUM_SECTOR
xp = NUM_SECTOR * 3
pp = NUM_SECTOR * 12
'''
### original implementation
partOfNorm = np.zeros((sizeY*sizeX), np.float32)
for i in range(sizeX*sizeY):
pos = i * mapp['numFeatures']
partOfNorm[i] = np.sum(mapp['map'][pos:pos+p]**2) ###
'''
### 50x speedup
lenarry = int(sizeX*sizeY*mapp['numFeatures'])
idx = np.arange(0, lenarry, mapp['numFeatures']).reshape((int(sizeX*sizeY), 1)) + np.arange(p)
partOfNorm = np.sum(mapp['map'][idx] ** 2, axis=1) ### ~0.0002s
sizeX, sizeY = sizeX-2, sizeY-2
'''
### original implementation
newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp) #func3 without @jit ###
### 30x speedup
newData = np.zeros((sizeY*sizeX*pp), np.float32)
idx = (np.arange(1,sizeY+1)[:,np.newaxis] * (sizeX+2) + np.arange(1,sizeX+1)).reshape((sizeY*sizeX, 1)) # much faster than it's List Comprehension counterpart (see next line)
#idx = np.array([[i*(sizeX+2) + j] for i in range(1,sizeY+1) for j in range(1,sizeX+1)])
pos1 = idx * xp
pos2 = np.arange(sizeY*sizeX)[:,np.newaxis] * pp
valOfNorm1 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2+1]) + FLT_EPSILON
valOfNorm2 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2+1]) + FLT_EPSILON
valOfNorm3 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2-1]) + FLT_EPSILON
valOfNorm4 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2-1]) + FLT_EPSILON
map1 = mapp['map'][pos1 + np.arange(p)]
map2 = mapp['map'][pos1 + np.arange(p,3*p)]
newData[pos2 + np.arange(p)] = map1 / valOfNorm1
newData[pos2 + np.arange(4*p,6*p)] = map2 / valOfNorm1
newData[pos2 + np.arange(p,2*p)] = map1 / valOfNorm2
newData[pos2 + np.arange(6*p,8*p)] = map2 / valOfNorm2
newData[pos2 + np.arange(2*p,3*p)] = map1 / valOfNorm3
newData[pos2 + np.arange(8*p,10*p)] = map2 / valOfNorm3
newData[pos2 + np.arange(3*p,4*p)] = map1 / valOfNorm4
newData[pos2 + np.arange(10*p,12*p)] = map2 / valOfNorm4 ###
'''
### 30x speedup
newData = func3(partOfNorm, mapp['map'], int(sizeX), int(sizeY), p, xp, pp) #with @jit
###
# truncation
newData[newData > alfa] = alfa
mapp['numFeatures'] = pp
mapp['sizeX'] = sizeX
mapp['sizeY'] = sizeY
mapp['map'] = newData
return mapp
def PCAFeatureMaps(mapp):
sizeX = mapp['sizeX']
sizeY = mapp['sizeY']
p = mapp['numFeatures']
pp = NUM_SECTOR * 3 + 4
yp = 4
xp = NUM_SECTOR
nx = 1.0 / np.sqrt(xp*2)
ny = 1.0 / np.sqrt(yp)
'''
### original implementation
newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny) #func without @jit ###
### 7.5x speedup
newData = np.zeros((sizeX*sizeY*pp), np.float32)
idx1 = np.arange(2*xp).reshape((2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp)
idx2 = np.arange(xp).reshape((xp, 1)) + np.arange(0, xp*yp, xp)
idx3 = np.arange(0, 2*xp*yp, 2*xp).reshape((yp, 1)) + np.arange(xp*yp, xp*yp+2*xp)
for i in range(sizeY):
for j in range(sizeX):
pos1 = (i*sizeX + j) * p
pos2 = (i*sizeX + j) * pp
newData[pos2 : pos2+2*xp] = np.sum(mapp['map'][pos1 + idx1], axis=1) * ny
newData[pos2+2*xp : pos2+3*xp] = np.sum(mapp['map'][pos1 + idx2], axis=1) * ny
newData[pos2+3*xp : pos2+3*xp+yp] = np.sum(mapp['map'][pos1 + idx3], axis=1) * nx ###
### 120x speedup
newData = np.zeros((sizeX*sizeY*pp), np.float32)
idx01 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp))
idx02 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp,3*xp)).reshape((sizeX*sizeY*xp))
idx03 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(3*xp,3*xp+yp)).reshape((sizeX*sizeY*yp))
idx11 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp)
idx12 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(xp)).reshape((sizeX*sizeY*xp, 1)) + np.arange(0, xp*yp, xp)
idx13 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(0, 2*xp*yp, 2*xp)).reshape((sizeX*sizeY*yp, 1)) + np.arange(xp*yp, xp*yp+2*xp)
newData[idx01] = np.sum(mapp['map'][idx11], axis=1) * ny
newData[idx02] = np.sum(mapp['map'][idx12], axis=1) * ny
newData[idx03] = np.sum(mapp['map'][idx13], axis=1) * nx ###
'''
### 190x speedup
newData = func4(mapp['map'], p, int(sizeX), int(sizeY), pp, yp, xp, nx, ny) #with @jit
###
mapp['numFeatures'] = pp
mapp['map'] = newData
return mapp
2.2 KCFtracker代码
import numpy as np
import cv2
import fhog
# ffttools
def fftd(img, backwards=False):
# shape of img can be (m,n), (m,n,1) or (m,n,2)
# in my test, fft provided by numpy and scipy are slower than cv2.dft
return cv2.dft(np.float32(img), flags = ((cv2.DFT_INVERSE | cv2.DFT_SCALE) if backwards else cv2.DFT_COMPLEX_OUTPUT)) # 'flags =' is necessary!
def real(img):
return img[:,:,0]
def imag(img):
return img[:,:,1]
def complexMultiplication(a, b):
res = np.zeros(a.shape, a.dtype)
res[:,:,0] = a[:,:,0]*b[:,:,0] - a[:,:,1]*b[:,:,1]
res[:,:,1] = a[:,:,0]*b[:,:,1] + a[:,:,1]*b[:,:,0]
return res
def complexDivision(a, b):
res = np.zeros(a.shape, a.dtype)
divisor = 1. / (b[:,:,0]**2 + b[:,:,1]**2)
res[:,:,0] = (a[:,:,0]*b[:,:,0] + a[:,:,1]*b[:,:,1]) * divisor
res[:,:,1] = (a[:,:,1]*b[:,:,0] + a[:,:,0]*b[:,:,1]) * divisor
return res
def rearrange(img):
#return np.fft.fftshift(img, axes=(0,1))
assert(img.ndim==2)
img_ = np.zeros(img.shape, img.dtype)
xh, yh = img.shape[1]/2, img.shape[0]/2
print("xh = ", xh)
print("yh = ", yh)
print("img = ", img.shape)
img
xh = int(xh)
yh = int(yh)
img_[0:yh,0:xh], img_[yh:img.shape[0],xh:img.shape[1]] = img[yh:img.shape[0],xh:img.shape[1]], img[0:yh,0:xh]
img_[0:yh,xh:img.shape[1]], img_[yh:img.shape[0],0:xh] = img[yh:img.shape[0],0:xh], img[0:yh,xh:img.shape[1]]
return img_
# recttools
def x2(rect):
return rect[0] + rect[2]
def y2(rect):
return rect[1] + rect[3]
def limit(rect, limit):
if(rect[0]+rect[2] > limit[0]+limit[2]):
rect[2] = limit[0]+limit[2]-rect[0]
if(rect[1]+rect[3] > limit[1]+limit[3]):
rect[3] = limit[1]+limit[3]-rect[1]
if(rect[0] < limit[0]):
rect[2] -= (limit[0]-rect[0])
rect[0] = limit[0]
if(rect[1] < limit[1]):
rect[3] -= (limit[1]-rect[1])
rect[1] = limit[1]
if(rect[2] < 0):
rect[2] = 0
if(rect[3] < 0):
rect[3] = 0
return rect
def getBorder(original, limited):
res = [0,0,0,0]
res[0] = limited[0] - original[0]
res[1] = limited[1] - original[1]
res[2] = x2(original) - x2(limited)
res[3] = y2(original) - y2(limited)
assert(np.all(np.array(res) >= 0))
return res
def subwindow(img, window, borderType=cv2.BORDER_CONSTANT):
cutWindow = [x for x in window]
limit(cutWindow, [0,0,img.shape[1],img.shape[0]]) # modify cutWindow
assert(cutWindow[2]>0 and cutWindow[3]>0)
border = getBorder(window, cutWindow)
res = img[cutWindow[1]:cutWindow[1]+cutWindow[3], cutWindow[0]:cutWindow[0]+cutWindow[2]]
if(border != [0,0,0,0]):
res = cv2.copyMakeBorder(res, border[1], border[3], border[0], border[2], borderType)
return res
# KCF tracker
class KCFTracker:
def __init__(self, hog=False, fixed_window=True, multiscale=False):
self.lambdar = 0.0001 # regularization
self.padding = 2.5 # extra area surrounding the target
self.output_sigma_factor = 0.125 # bandwidth of gaussian target
if(hog): # HOG feature
# VOT
self.interp_factor = 0.012 # linear interpolation factor for adaptation
self.sigma = 0.6 # gaussian kernel bandwidth
# TPAMI #interp_factor = 0.02 #sigma = 0.5
self.cell_size = 4 # HOG cell size
self._hogfeatures = True
else: # raw gray-scale image # aka CSK tracker
self.interp_factor = 0.075
self.sigma = 0.2
self.cell_size = 1
self._hogfeatures = False
if(multiscale):
self.template_size = 96 # template size
self.scale_step = 1.05 # scale step for multi-scale estimation
self.scale_weight = 0.96 # to downweight detection scores of other scales for added stability
elif(fixed_window):
self.template_size = 96
self.scale_step = 1
else:
self.template_size = 1
self.scale_step = 1
self._tmpl_sz = [0,0] # cv::Size, [width,height] #[int,int]
self._roi = [0.,0.,0.,0.] # cv::Rect2f, [x,y,width,height] #[float,float,float,float]
self.size_patch = [0,0,0] #[int,int,int]
self._scale = 1. # float
self._alphaf = None # numpy.ndarray (size_patch[0], size_patch[1], 2)
self._prob = None # numpy.ndarray (size_patch[0], size_patch[1], 2)
self._tmpl = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1])
self.hann = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1])
def subPixelPeak(self, left, center, right):
divisor = 2*center - right - left #float
return (0 if abs(divisor)<1e-3 else 0.5*(right-left)/divisor)
def createHanningMats(self):
hann2t, hann1t = np.ogrid[0:self.size_patch[0], 0:self.size_patch[1]]
hann1t = 0.5 * (1 - np.cos(2*np.pi*hann1t/(self.size_patch[1]-1)))
hann2t = 0.5 * (1 - np.cos(2*np.pi*hann2t/(self.size_patch[0]-1)))
hann2d = hann2t * hann1t
if(self._hogfeatures):
hann1d = hann2d.reshape(self.size_patch[0]*self.size_patch[1])
self.hann = np.zeros((self.size_patch[2], 1), np.float32) + hann1d
else:
self.hann = hann2d
self.hann = self.hann.astype(np.float32)
def createGaussianPeak(self, sizey, sizex):
syh, sxh = sizey/2, sizex/2
output_sigma = np.sqrt(sizex*sizey) / self.padding * self.output_sigma_factor
mult = -0.5 / (output_sigma*output_sigma)
y, x = np.ogrid[0:sizey, 0:sizex]
y, x = (y-syh)**2, (x-sxh)**2
res = np.exp(mult * (y+x))
return fftd(res)
def gaussianCorrelation(self, x1, x2):
if(self._hogfeatures):
c = np.zeros((self.size_patch[0], self.size_patch[1]), np.float32)
print("size_patch = ", self.size_patch)
print("size_patch[0] = ", self.size_patch[0])
print("size_patch[1] = ", self.size_patch[1])
for i in range(self.size_patch[2]):
x1aux = x1[i, :].reshape((self.size_patch[0], self.size_patch[1]))
x2aux = x2[i, :].reshape((self.size_patch[0], self.size_patch[1]))
caux = cv2.mulSpectrums(fftd(x1aux), fftd(x2aux), 0, conjB = True)
caux = real(fftd(caux, True))
#caux = rearrange(caux)
c += caux
c = rearrange(c)
else:
c = cv2.mulSpectrums(fftd(x1), fftd(x2), 0, conjB = True) # 'conjB=' is necessary!
c = fftd(c, True)
c = real(c)
c = rearrange(c)
if(x1.ndim==3 and x2.ndim==3):
d = (np.sum(x1[:,:,0]*x1[:,:,0]) + np.sum(x2[:,:,0]*x2[:,:,0]) - 2.0*c) / (self.size_patch[0]*self.size_patch[1]*self.size_patch[2])
elif(x1.ndim==2 and x2.ndim==2):
d = (np.sum(x1*x1) + np.sum(x2*x2) - 2.0*c) / (self.size_patch[0]*self.size_patch[1]*self.size_patch[2])
d = d * (d>=0)
d = np.exp(-d / (self.sigma*self.sigma))
return d
def getFeatures(self, image, inithann, scale_adjust=1.0):
extracted_roi = [0,0,0,0] #[int,int,int,int]
print("11111111111111 = ", self._roi)
#_roi = [0,0,0,0]
#for item in self._roi:
#print("item = ", item)
#print("11111111111111111 ",self._roi)
cx = self._roi[0] + self._roi[2] / 2 #float
cy = self._roi[1] + self._roi[3] / 2 #float
if(inithann):
padded_w = self._roi[2] * self.padding
padded_h = self._roi[3] * self.padding
print("self.padding = ", self.padding)
print("self._roi[ = ", self._roi)
if(self.template_size > 1):
print("padded_w = ", padded_w)
print("padded_h = ", padded_h)
if(padded_w >= padded_h):
self._scale = padded_w / float(self.template_size)
else:
self._scale = padded_h / float(self.template_size)
print("self._scale = ", self._scale)
self._tmpl_sz[0] = int(padded_w / self._scale)
self._tmpl_sz[1] = int(padded_h / self._scale)
else:
self._tmpl_sz[0] = int(padded_w)
self._tmpl_sz[1] = int(padded_h)
self._scale = 1.
if(self._hogfeatures):
self._tmpl_sz[0] = int((self._tmpl_sz[0]) / (2*self.cell_size)) * 2*self.cell_size + 2*self.cell_size
self._tmpl_sz[1] = int((self._tmpl_sz[1]) / (2*self.cell_size)) * 2*self.cell_size + 2*self.cell_size
else:
self._tmpl_sz[0] = int(self._tmpl_sz[0]) / 2 * 2
self._tmpl_sz[1] = int(self._tmpl_sz[1]) / 2 * 2
extracted_roi[2] = int(scale_adjust * self._scale * self._tmpl_sz[0])
extracted_roi[3] = int(scale_adjust * self._scale * self._tmpl_sz[1])
extracted_roi[0] = int(cx - extracted_roi[2]/2)
extracted_roi[1] = int(cy - extracted_roi[3]/2)
z = subwindow(image, extracted_roi, cv2.BORDER_REPLICATE)
if(z.shape[1]!=self._tmpl_sz[0] or z.shape[0]!=self._tmpl_sz[1]):
self._tmpl_sz[0] = int(self._tmpl_sz[0])
self._tmpl_sz[1] = int(self._tmpl_sz[1])
print("self._tmpl_sz = ", self._tmpl_sz)
z = cv2.resize(z, tuple(self._tmpl_sz))
if(self._hogfeatures):
mapp = {'sizeX':0, 'sizeY':0, 'numFeatures':0, 'map':0}
mapp = fhog.getFeatureMaps(z, self.cell_size, mapp)
mapp = fhog.normalizeAndTruncate(mapp, 0.2)
mapp = fhog.PCAFeatureMaps(mapp)
print("sizeY = ", mapp['sizeY'])
print("sizeX = ", mapp['sizeX'])
print("numFeatures = ", mapp['numFeatures'])
self.size_patch = map(int, [mapp['sizeY'], mapp['sizeX'], mapp['numFeatures']])
self.size_patch = list(self.size_patch)
FeaturesMap = mapp['map'].reshape((self.size_patch[0]*self.size_patch[1], self.size_patch[2])).T # (size_patch[2], size_patch[0]*size_patch[1])
else:
if(z.ndim==3 and z.shape[2]==3):
FeaturesMap = cv2.cvtColor(z, cv2.COLOR_BGR2GRAY) # z:(size_patch[0], size_patch[1], 3) FeaturesMap:(size_patch[0], size_patch[1]) #np.int8 #0~255
elif(z.ndim==2):
FeaturesMap = z #(size_patch[0], size_patch[1]) #np.int8 #0~255
FeaturesMap = FeaturesMap.astype(np.float32) / 255.0 - 0.5
self.size_patch = [z.shape[0], z.shape[1], 1]
if(inithann):
self.createHanningMats() # createHanningMats need size_patch
FeaturesMap = self.hann * FeaturesMap
return FeaturesMap
def detect(self, z, x):
k = self.gaussianCorrelation(x, z)
res = real(fftd(complexMultiplication(self._alphaf, fftd(k)), True))
_, pv, _, pi = cv2.minMaxLoc(res) # pv:float pi:tuple of int
p = [float(pi[0]), float(pi[1])] # cv::Point2f, [x,y] #[float,float]
if(pi[0]>0 and pi[0]<res.shape[1]-1):
p[0] += self.subPixelPeak(res[pi[1],pi[0]-1], pv, res[pi[1],pi[0]+1])
if(pi[1]>0 and pi[1]<res.shape[0]-1):
p[1] += self.subPixelPeak(res[pi[1]-1,pi[0]], pv, res[pi[1]+1,pi[0]])
p[0] -= res.shape[1] / 2.
p[1] -= res.shape[0] / 2.
return p, pv
def train(self, x, train_interp_factor):
k = self.gaussianCorrelation(x, x)
alphaf = complexDivision(self._prob, fftd(k)+self.lambdar)
self._tmpl = (1-train_interp_factor)*self._tmpl + train_interp_factor*x
self._alphaf = (1-train_interp_factor)*self._alphaf + train_interp_factor*alphaf
def init(self, roi, image):
#self._roi = map(float, roi)
self._roi = roi
assert(roi[2]>0 and roi[3]>0)
self._tmpl = self.getFeatures(image, 1)
self._prob = self.createGaussianPeak(self.size_patch[0], self.size_patch[1])
self._alphaf = np.zeros((self.size_patch[0], self.size_patch[1], 2), np.float32)
self.train(self._tmpl, 1.0)
def update(self, image):
if(self._roi[0]+self._roi[2] <= 0): self._roi[0] = -self._roi[2] + 1
if(self._roi[1]+self._roi[3] <= 0): self._roi[1] = -self._roi[2] + 1
if(self._roi[0] >= image.shape[1]-1): self._roi[0] = image.shape[1] - 2
if(self._roi[1] >= image.shape[0]-1): self._roi[1] = image.shape[0] - 2
cx = self._roi[0] + self._roi[2]/2.
cy = self._roi[1] + self._roi[3]/2.
loc, peak_value = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0))
if(self.scale_step != 1):
# Test at a smaller _scale
new_loc1, new_peak_value1 = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0/self.scale_step))
# Test at a bigger _scale
new_loc2, new_peak_value2 = self.detect(self._tmpl, self.getFeatures(image, 0, self.scale_step))
if(self.scale_weight*new_peak_value1 > peak_value and new_peak_value1>new_peak_value2):
loc = new_loc1
peak_value = new_peak_value1
self._scale /= self.scale_step
self._roi[2] /= self.scale_step
self._roi[3] /= self.scale_step
elif(self.scale_weight*new_peak_value2 > peak_value):
loc = new_loc2
peak_value = new_peak_value2
self._scale *= self.scale_step
self._roi[2] *= self.scale_step
self._roi[3] *= self.scale_step
self._roi[0] = cx - self._roi[2]/2.0 + loc[0]*self.cell_size*self._scale
self._roi[1] = cy - self._roi[3]/2.0 + loc[1]*self.cell_size*self._scale
if(self._roi[0] >= image.shape[1]-1): self._roi[0] = image.shape[1] - 1
if(self._roi[1] >= image.shape[0]-1): self._roi[1] = image.shape[0] - 1
if(self._roi[0]+self._roi[2] <= 0): self._roi[0] = -self._roi[2] + 2
if(self._roi[1]+self._roi[3] <= 0): self._roi[1] = -self._roi[3] + 2
assert(self._roi[2]>0 and self._roi[3]>0)
x = self.getFeatures(image, 0, 1.0)
self.train(x, self.interp_factor)
return self._roi
2.3 run 代码
#encoding=utf-8
import numpy as np
import cv2
import sys
from time import time
import os
import kcftracker
selectingObject = False
initTracking = False
onTracking = False
ix, iy, cx, cy = -1, -1, -1, -1
w, h = 0, 0
inteval = 1
duration = 0.01
# mouse callback function
def draw_boundingbox(event, x, y, flags, param):
global selectingObject, initTracking, onTracking, ix, iy, cx,cy, w, h
if event == cv2.EVENT_LBUTTONDOWN:
selectingObject = True
onTracking = False
ix, iy = x, y
cx, cy = x, y
elif event == cv2.EVENT_MOUSEMOVE:
cx, cy = x, y
elif event == cv2.EVENT_LBUTTONUP:
selectingObject = False
if(abs(x-ix)>10 and abs(y-iy)>10):
w, h = abs(x - ix), abs(y - iy)
ix, iy = min(x, ix), min(y, iy)
initTracking = True
else:
onTracking = False
elif event == cv2.EVENT_RBUTTONDOWN:
onTracking = False
if(w>0):
ix, iy = x-w/2, y-h/2
initTracking = True
if __name__ == '__main__':
tracker = kcftracker.KCFTracker(True, True, True) # hog, fixed_window, multiscale
path = './1111'
path_list = os.listdir(path)
path_list.sort()
i = 0
_roi = [0.,0.,0.,0.]
print(_roi[0])
for filename in path_list:
print ( filename)
oimg = cv2.imread(path + "/" + filename)
if i == 0:
tracker.init([558,213,93,59], oimg)
else:
boundingbox = tracker.update(oimg)
boundingbox = map(int, boundingbox)
boundingbox = list(boundingbox)
cv2.rectangle(oimg,(boundingbox[0],boundingbox[1]), (boundingbox[0]+
boundingbox[2],boundingbox[1]+boundingbox[3]),
(0,255,255), 1)
tpath1= str(i + 100000) + 'result.jpg'
cv2.imwrite(tpath1, oimg)
i = i + 1
上一篇: 初学者全面解析KCF跟踪算法的原理
下一篇: 学习实时目标跟踪:Python进阶指南
推荐阅读
-
重新描述循环矩阵在KCF中的应用
-
KCF跟踪算法在Python中的应用
-
目标跟踪算法KCF的研究和应用
-
Python实现具备单一目标、多目标、多尺度和自定义特征的KCF跟踪算法实例代码
-
重写的标题:小顶堆算法在堆排序中的应用
-
常见的JAVA API在算法竞赛中的应用:PriorityQueue(优先队列)
-
解析光线追踪的算法(AABB、BVH、SAH)在Games101课程中的应用+ 完成作业6
-
领域最全 | 计算机视觉算法在路面坑洼检测中的应用综述(基于2D图像/3D LiDAR/深度学习)(下)
-
深入理解分治算法在LeetCode实战中的应用:快速排序思维解析与算法沉淀
-
【摩尔线程+Colossal-AI强强联手】MusaBert登上CLUE榜单TOP10:技术细节揭秘 - 技术实力:摩尔线程凭借"软硬兼备"的技术底蕴,让MusaBert得以从底层优化到顶层。其内置多功能GPU配备AI加速和并行计算模块,提供了全面的AI与科学计算支持,为AI推理和低资源条件下的大模型训练等场景带来了高效、经济且环保的算力。 - 算法层面亮点:依托Colossal-AI AI大模型开发系统,MusaBert在训练过程中展现出了卓越的并行性能与易用性,特别在预处理阶段对DataLoader进行了优化,适应低资源环境高效处理海量数据。同时,通过精细的建模优化、领域内数据增强以及Adan优化器等手段,挖掘和展示了预训练语言模型出色的语义理解潜力。基于MusaBert,摩尔线程自主研发的MusaSim通过对比学习方法微调,结合百万对标注数据,MusaSim在多个任务如语义相似度、意图识别和情绪分析中均表现出色。 - 数据资源丰富:MusaBert除了自家高质量语义相似数据外,还融合了悟道开源200GB数据、CLUE社区80GB数据,以及浪潮公司提供的1TB高质量数据,保证模型即便在较小规模下仍具备良好性能。 当前,MusaBert已成功应用于摩尔线程的智能客服与数字人项目,并广泛服务于语义相似度、情绪识别、阅读理解与声韵识别等领域。为了降低大模型开发和应用难度,MusaBert及其相关高质量模型代码已在Colossal-AI仓库开源,可快速训练优质中文BERT模型。同时,通过摩尔线程与潞晨科技的深度合作,仅需一张多功能GPU单卡便能高效训练MusaBert或更大规模的GPT2模型,显著降低预训练成本,进一步推动双方在低资源大模型训练领域的共享目标。 MusaBert荣登CLUE榜单TOP10,象征着摩尔线程与潞晨科技联合研发团队在中文预训练研究领域的领先地位。展望未来,双方将携手探索更大规模的自然语言模型研究,充分运用上游数据资源,产出更为强大的模型并开源。持续强化在摩尔线程多功能GPU上的大模型训练能力,特别是在消费级显卡等低资源环境下,致力于降低使用大模型训练的门槛与成本,推动人工智能更加普惠。而潞晨科技作为重要合作伙伴,将继续发挥关键作用。