最近因为需要,会用到二值图像连通域分析算法,网上找了一些算法,但是都存在问题,比较多的一个是利用等价表的方法剔除错误标签方法,但是在后续开发中,验证了这个算法也存在问题,参考这个链接,改为c++的代码,文中作者也提出了相应的改进算法,但是不是很明白,按此建议修改错误标签更正算法,其结果经验证与matlab bwlabel算法结果一直,算法速度上没有做进一步优化,日后会做进一步优化。

###原始代码

/// 连通区域标记
void CalNumberOfRuns(const uchar *data, int height, int width, int &numRuns)
{
    assert(data != NULL);
    int count = 0;
    for (int j = 0; j < width; j++)
    {
        if (data[j] != 0)
        {
            count++;
        }
        for (int i = 1; i < height; i++)
        {
            if (data[i * width + j] != 0 && data[(i - 1) * width + j] == 0)
            {
                count++;
            }
        }
    }
    numRuns = count;
}

void FillRunVectors(const uchar* data, int *runs, int height, int width, int *firstRowDex, int *lastRowDex, int *colDex, int numRuns)
{
    assert(data != NULL && runs != NULL && firstRowDex != NULL && lastRowDex != NULL && colDex != NULL);
    int k = -1;
    memset(runs, 0, height * width * sizeof(int));
    for (int j = 0; j < width; j++)
    {
        if (data[j] != 0)
        {
            k++;
            firstRowDex[k] = 0;
            colDex[k] = j;
            runs[j] = k + 1;
        }
        for (int i = 1; i < height; i++)
        {
            int index = i * width + j;
            int _index = index - width;
            if (data[index] == 0 && data[_index] != 0)
            {
                lastRowDex[k] = i - 1;
            }
            else
            {
                if (data[index] != 0)
                {
                    if (data[_index] == 0)
                    {
                        k++;
                        firstRowDex[k] = i;
                        colDex[k] = j;
                    }
                    runs[index] = k + 1;

                    if (i == height - 1)
                    {
                        lastRowDex[k] = i;
                    }
                }
            }
        }
    }
    assert(k == numRuns - 1);
}
/// 标记二值连通区域,返回连通区域个数
bool Mor_Bwlabel(uchar *data, int *label, int height, int width, int minarea, int mode, int &numRegions)
{
    assert(data != NULL && label != NULL);
    int totalPixels = height * width;
    // calculate the number of runs in data
    int numRuns = 0;
    CalNumberOfRuns(data, height, width, numRuns);
    // calculate run vectors, including firstRowDex, lastRowDex, colDex of each run
    int *firstRowDex = new int [numRuns];
    int *lastRowDex = new int [numRuns];
    int *colDex = new int [numRuns];
    int *runs = new int[totalPixels];
    FillRunVectors(data, runs, height, width, firstRowDex, lastRowDex, colDex, numRuns);

    // Initiate runLabels (the label of each run)
    int *runLabels = new int[numRuns];
    for (int i = 0; i < numRuns; i++)
    {
        runLabels[i] = 0;
    }

    // 初始化等价集合表
    int *equalSet = new int[MaxConnectRegions];
    for (int i = 0; i < MaxConnectRegions; i++)
    {
        equalSet[i] = i;
    }

    // Initiate params
    int currentColumn = -1;
    int nextLabel = 1;
    int firstRunOnPreviousColumn = -1;
    int lastRunOnPreviousColumn = -1;
    int firstRunOnThisColumn = -1;
    int offset = 0;
    if (mode == 8)
    {
        offset = 1;
    }

    for (int k = 0; k < numRuns; k++)
    {
        if (colDex[k] == currentColumn + 1)
        {
            firstRunOnPreviousColumn = firstRunOnThisColumn;
            firstRunOnThisColumn = k;
            lastRunOnPreviousColumn = k-1;
            currentColumn = colDex[k];
        }
        else if (colDex[k] > currentColumn + 1)
        {
            firstRunOnPreviousColumn = -1;
            firstRunOnThisColumn = k;
            lastRunOnPreviousColumn = -1;
            currentColumn = colDex[k];
        }

        if (firstRunOnPreviousColumn >= 0)
        {
            int p = firstRunOnPreviousColumn;
            while (p <= lastRunOnPreviousColumn && lastRowDex[k] >= firstRowDex[p] - offset)
            {
                if (firstRowDex[k] <= lastRowDex[p] + offset)
                {
                    if (runLabels[k] == 0)
                    {
                        runLabels[k] = runLabels[p];
                    }
                    else if (runLabels[k] != runLabels[p])
                    {
                        if (equalSet[runLabels[k]] < equalSet[runLabels[p]])
                        {
                            equalSet[runLabels[p]] = equalSet[runLabels[k]];
                        }
                        else
                        {
                            equalSet[runLabels[k]] = equalSet[runLabels[p]];
                        }
                    }
                }
                p++;
            }
        }

        if (runLabels[k] == 0)
        {
            runLabels[k] = nextLabel;
            nextLabel++;
            if (nextLabel > MaxConnectRegions)
            {
                return false;
            }
        }
    }


    //标记替换
    for (int i = 0; i < nextLabel; i++)
    {
        equalSet[i] = equalSet[equalSet[i]];
    }


    for (int i = 0; i < numRuns; i++)
    {
        runLabels[i] = equalSet[runLabels[i]];
    }


    // 标记连通区域标号
    for (int i = 0; i < totalPixels; i++)
    {
        if (runs[i] != 0)
        {
            label[i] = runLabels[runs[i]-1];
        }
        else
        {
            label[i] = 0;
        }
    }

    int *area = new int[nextLabel];
    memset(area, 0, sizeof(int) * nextLabel);
    for (int i = 0; i < totalPixels; i++)
    {
        area[label[i]]++;
    }
    numRegions = 0;
    for (int i = 1; i < nextLabel; i++)
    {
        if (area[i] < minarea)
        {
            equalSet[i] = 0;
        }
        else
        {        
            numRegions++;
            equalSet[i] = numRegions;
        }
    }
    for (int i = 0; i < totalPixels; i++)
    {
        label[i] = equalSet[label[i]];
    }

    delete[] firstRowDex;
    delete[] lastRowDex;
    delete[] colDex;
    delete[] runs;
    delete[] runLabels;
    delete[] equalSet;
    return true;
}

###改进后代码

// 依据图连通域分量合并错分的runs,并重新赋值,返回实际的连通域个数
int RunlabelDeleteError(int *runlabel, int *outlabel, int numRuns, vector<vector<int> > sameLabel)
{
    for (int i = 0; i < sameLabel.size(); i++)
    {

        if (sameLabel[i].size() < 2)
        {
            continue;
        }
        for (int j = 0; j < sameLabel[i].size(); j++)
        {
            for (int k = 0; k < numRuns; k++)
            {
                if (runlabel[k] == sameLabel[i][j])
                {
                    runlabel[k] = sameLabel[i][0];
                }
            }
        }
    }
    // label 去重,排序
    vector<int > temp;
    temp.push_back(runlabel[0]);
    for (int i = 1; i < numRuns; i++)
    {
        bool flag = false;
        for (int j = 0; j < temp.size(); j++)
        {
            flag |= runlabel[i] == temp[j];
        }
        if (!flag)
        {
            temp.push_back(runlabel[i]);
        }
    }
    int *data = new int[temp.size()];
    int *index = new int[temp.size()];
    for (int i = 0; i < temp.size(); i++)
    {
        data[i] = temp[i];
        index[i] = i;
    }
    for (int i = 0; i < temp.size() - 1; i++)
    {
        for (int j = i + 1; j < temp.size(); j++)
        {
            if (data[i] > data[j])
            {
                int temp =data[i];
                data[i] = data[j];
                data[j] = temp;
                temp = index[i];
                index[i] = index[j];
                index[j] = temp;
            }
        }
    }
    for (int i = 0; i < numRuns; i++)
    {
        int index = 0; 
        for (int j = 0; j < temp.size(); j++)
        {
            if (runlabel[i] == data[j])
            {
                index = j;
                break;
            }
        }
        outlabel[i] = index + 1;
    }

    delete[] data;
    delete[] index;

    return temp.size();
}


void CalNumberOfRuns(const uchar *data, int height, int width, int &numRuns)
{
    assert(data != NULL);
    int count = 0;
    for (int j = 0; j < width; j++)
    {
        if (data[j] != 0)
        {
            count++;
        }
        for (int i = 1; i < height; i++)
        {
            if (data[i * width + j] != 0 && data[(i - 1) * width + j] == 0)
            {
                count++;
            }
        }
    }
    numRuns = count;
}
// 记录每一个runs的开始行、结束行和runs所在的列
void FillRunVectors(const uchar* data, int height, int width, int *firstRowDex, int *lastRowDex, int *colDex, int numRuns)
{
    assert(data != NULL  && firstRowDex != NULL && lastRowDex != NULL && colDex != NULL);
    int k = -1;
    for (int j = 0; j < width; j++)
    {
        if (data[j] != 0)
        {
            k++;
            firstRowDex[k] = 0;
            colDex[k] = j;
        }
        for (int i = 1; i < height; i++)
        {
            int index = i * width + j;
            int _index = index - width;
            if (data[index] == 0 && data[_index] != 0)
            {
                lastRowDex[k] = i - 1;
            }
            else
            {
                if (data[index] != 0)
                {
                    if (data[_index] == 0)
                    {
                        k++;
                        firstRowDex[k] = i;
                        colDex[k] = j;
                    }

                    if (i == height - 1)
                    {
                        lastRowDex[k] = i;
                    }
                }
            }
        }
    }
    assert(k == numRuns - 1);
}
/// 标记二值连通区域,返回连通区域个数
bool Mor_Bwlabel(const uchar *data, int *label, int height, int width, int mode, int &numRegions)
{
    assert(data != NULL && label != NULL);
    int totalPixels = height * width;
    // calculate the number of runs in data
    int numRuns = 0;
    CalNumberOfRuns(data, height, width, numRuns);
    // calculate run vectors, including firstRowDex, lastRowDex, colDex of each run
    int *firstRowDex = new int [numRuns];
    int *lastRowDex = new int [numRuns];
    int *colDex = new int [numRuns];
    FillRunVectors(data, height, width, firstRowDex, lastRowDex, colDex, numRuns);

    // Initiate runLabels (the label of each run)
    int *runLabels = new int[numRuns];
    for (int i = 0; i < numRuns; i++)
    {
        runLabels[i] = 0;
    }

    // Initiate params
    int currentColumn = -1;
    int nextLabel = 1;
    int firstRunOnPreviousColumn = -1;
    int lastRunOnPreviousColumn = -1;
    int firstRunOnThisColumn = -1;
    int offset = 0;
    if (mode == 8)
    {
        offset = 1;
    }

    //////////////////////////////////////////////////////////////////////////
    // 利用图深度优先搜索合并错误标签
    vector<edge> sameLabel;
    int p;

    for (int k = 0; k < numRuns; k++)
    {
        if (colDex[k] == currentColumn + 1)
        {
            // 第k个runs跟第k-1个runs在相邻列上
            firstRunOnPreviousColumn = firstRunOnThisColumn;
            firstRunOnThisColumn = k;
            lastRunOnPreviousColumn = k-1;
            currentColumn = colDex[k];
        }
        else if (colDex[k] > currentColumn + 1)
        {
            firstRunOnPreviousColumn = -1;
            firstRunOnThisColumn = k;
            lastRunOnPreviousColumn = -1;
            currentColumn = colDex[k];
        }

        if (firstRunOnPreviousColumn >= 0)
        {
            p = firstRunOnPreviousColumn;
            while (p <= lastRunOnPreviousColumn && lastRowDex[k] >= firstRowDex[p] - offset)
            {
                if (firstRowDex[k] <= lastRowDex[p] + offset)
                {
                    if (runLabels[k] == 0)
                    {
                        runLabels[k] = runLabels[p];
                    }
                    else if (runLabels[k] != runLabels[p])
                    {
                        edge etemp;
                        etemp.start = runLabels[k];
                        etemp.end = runLabels[p];
                        sameLabel.push_back(etemp);
                    }
                }
                p++;
            }
        }

        if (runLabels[k] == 0)
        {
            runLabels[k] = nextLabel;
            nextLabel++;
        }
    }

    vector<vector<int> > tempLabel;
    DfnComponent(sameLabel, tempLabel);  

    int *newlabel = new int[numRuns];
    numRegions = RunlabelDeleteError(runLabels, newlabel, numRuns, tempLabel);

    // 赋值label
    memset(label, 0, sizeof(int) * width * height);
    for (int i = 0; i < numRuns; i++)
    {
        for (int k = firstRowDex[i]; k <= lastRowDex[i]; k++)
        {
            label[k * width + colDex[i]] = newlabel[i];
        }
    }

    delete[] newlabel;
    delete[] firstRowDex;
    delete[] lastRowDex;
    delete[] colDex;
    delete[] runLabels;
    return true;
}

图结构:邻接链表表示

/// 图算法相关
typedef struct EDGE{
    int start, end;
    EDGE()
    {
        start = 0;
        end = 0;
    }
    EDGE(int a, int b)
    {
        start = a;
        end = b;
    }
}edge;
typedef struct GraphicNode{
    int vertex; //顶点数据信息
    GraphicNode *nextNode;
}graphNode;

图深度优先搜索及图连通域分量分析

#include "DataStruct.h"
#include <vector>

using namespace std;

void CreateGpaphic(vector<edge> pt, GraphicNode *G)
{
    for (int i = 0; i < pt.size(); i++)
    {
        int start = pt[i].start;
        int end = pt[i].end;
        // create a new node
        GraphicNode *node, *ptr;
        node = new GraphicNode();
        node->vertex = end;
        node->nextNode = NULL;

        ptr = &(G[start]);
        while (ptr->nextNode != NULL)
        {
            ptr = ptr->nextNode;
        }
        ptr->nextNode = node;
    }
}

// 图深度优先搜索算法
void dfn(GraphicNode *G, bool *visited, int current, vector<int> &index)
{
    //printf("vertex = %d\n", current);
    index.push_back(current);
    visited[current] = true;
    GraphicNode *p = &G[current];
    while (p != NULL)
    {
        if (!visited[p->vertex])
        {
            dfn(G, visited, p->vertex, index);
        }
        p = p->nextNode;
    }
}

// 图连通域分量
void DfnComponent(vector<edge> pt, vector<vector<int> > &outdata)
{
    // 统计节点个数, 以空间消耗替代时间消耗
    int nodeNum = max(pt[0].start, pt[0].end);
    int startIndex = min(pt[0].start, pt[0].end);
    for (int i = 1; i < pt.size(); i++)
    {
        if (pt[i].start > nodeNum)
        {
            nodeNum = pt[i].start;
        }
        if (pt[i].end > nodeNum)
        {
            nodeNum = pt[i].end;
        }
        if (pt[i].start < startIndex)
        {
            startIndex = pt[i].start;
        }
        if (pt[i].end < startIndex)
        {
            startIndex = pt[i].end;
        }
    }

    // 构造无向图
    vector<edge> pt2; // 无向图应该为双向的,输入pt为单向的,复制反向
    for (int i = 0; i < pt.size(); i++)
    {
        edge ptemp(pt[i].end, pt[i].start);
        pt2.push_back(pt[i]);
        pt2.push_back(ptemp);
    }
    nodeNum = nodeNum + 1; // 开始索引为0

    GraphicNode *G = new GraphicNode[nodeNum];
    bool *visited = new bool[nodeNum];
    memset(visited, 0, nodeNum);
    // initial the G
    for (int i = 0; i < nodeNum; i++)
    {
        G[i].vertex = i;
        G[i].nextNode = NULL;
    }
    CreateGpaphic(pt2, G);
    int count = 0;
    for (int i = startIndex; i < nodeNum; i++)
    {
        if (!visited[i] && G[i].nextNode != NULL)
        {   
            vector<int> index;
            dfn(G, visited, i, index);
            count++;
            outdata.push_back(index);
        }
    }


    delete[] G;
    delete[] visited;
}