FunnyWeb

据说这里有很多神奇的东西...

国科大教务验证码识别

2017-02-22 21:45:09704 views


看看上一篇还是去年10月写的,仿佛好久没有写过东西了,最近刚开学也是十分闲,写写无聊的代码压压惊...

国科大的选课系统大家都是日常刷课日常崩,终于学会了加个验证码。

讲道理这个验证码是挺良心的,无倾斜无干扰线,看起来挺好识别。

look,就是它! 0729pic.gif

宽60px,高20px,有少量干扰点。

1.二值化

取图片的平均灰度作为阈值,低于该值的全都为0,高于该值的全都为255。

(其实本图十分清晰,100-200内随便选择阈值均可)

000.jpg

2.去边框和噪点

图片有1px的边框,去除即可

for (int i = 0; i < bm.Height; i++)
{
    bm.SetPixel(0, i, Color.White);
    bm.SetPixel(bm.Width - 1, i, Color.White);
}
for (int i = 0; i < bm.Width; i++)
{
    bm.SetPixel(i, 0, Color.White);
    bm.SetPixel(i, bm.Height - 1, Color.White);
}

001.jpg

去除噪点时,噪点的分布也较少,只需比对一个点的上下左右四个点,如果周围均为白色,则识别为噪点。

for (int i = 1; i < bm.Height - 1; i++)
{
    for (int j = 1; j < bm.Width - 1; j++)
    {
        if (bm.GetPixel(j, i).R == 0 && bm.GetPixel(j - 1, i).R != 0 && bm.GetPixel(j, i - 1).R != 0 && bm.GetPixel(j, i + 1).R != 0 && bm.GetPixel(j + 1, i).R != 0)
        {
            bm.SetPixel(j, i, Color.White);
        }
    }
}

即可得到比较清晰的字符

000.jpg

3.分割字符

基本思路即为逐列扫描,分离出字符,然后单字符逐行扫描,去除空白。

为了减少未去除的噪点带来的影响做了一些奇奇怪怪的处理。(这个是之前写的函数,现在自己看起来都十分的迷醉...然而虽然代码看起来不好看但还是挺有效的)

public static int[,] DividePic(Bitmap bm)
        {
            int[] arrayInt = new int[bm.Width];
            for (int i = 0; i < bm.Width; i++)
            {
                int count = 0;
                for (int j = 0; j < bm.Height; j++)
                {
                    if (bm.GetPixel(i, j).R == 0)
                        count++;
                }
                arrayInt[i] = count;
                //Console.Write(count.ToString() +" ");
            }
            int start1 = 0, start2 = 0, start3 = 0, start4 = 0;
            int end1 = 0, end2 = 0, end3 = 0, end4 = 0;
            int up1 = 0, up2 = 0, up3 = 0, up4 = 0;
            int down1 = 20, down2 = 20, down3 = 20, down4 = 20;
            int signx = 0;//当前x轴标识
            int signy = 0; //当前y轴标识
            int minwidth = 2;//允许最小宽度
            int minspace = 1;//允许最小宽度
            for (int i = 0; i < bm.Width; i++)
            {
                if (arrayInt[i] > 0 && signx == 0)
                {
                    start1 = i;
                    signx = 1;
                }

                if (arrayInt[i] == 0 && signx == 1)
                {
                    if (i - start1 >= minwidth)
                    {
                        end1 = i;
                        signx = 2;
                    }
                    else
                    {
                        i = start1 + 1;
                        signx = 0;
                    }
                }
                if (arrayInt[i] > 0 && signx == 2)
                {
                    if (i - end1 >= minspace)
                    {
                        start2 = i;
                        signx = 3;
                    }
                    else
                    {
                        i = end1 + 1;
                        signx = 1;
                    }
                }
                if (arrayInt[i] == 0 && signx == 3)
                {
                    if (i - start2 >= minwidth)
                    {
                        end2 = i;
                        signx = 4;
                    }
                    else
                    {
                        i = start2 + 1;
                        signx = 2;
                    }
                }

                if (arrayInt[i] > 0 && signx == 4)
                {
                    if (i - end2 >= minspace)
                    {
                        start3 = i;
                        signx = 5;
                    }
                    else
                    {
                        i = end2 + 1;
                        signx = 3;
                    }
                }

                if (arrayInt[i] == 0 && signx == 5)
                {
                    if (i - start3 >= minwidth)
                    {
                        end3 = i;
                        signx = 6;
                    }
                    else
                    {
                        i = start3 + 1;
                        signx = 3;
                    }
                }

                if (arrayInt[i] > 0 && signx == 6)
                {
                    if (i - end3 >= minspace)
                    {
                        start4 = i;
                        signx = 7;
                    }
                    else
                    {
                        i = end3 + 1;
                        signx = 5;
                    }
                }

                if (arrayInt[i] == 0 && signx == 7)
                {
                    if (i - start4 >= minwidth)
                    {
                        end4 = i;
                        signx = 10;
                    }
                    else
                    {
                        i = start4 + 1;
                        signx = 6;
                    }
                }


                if (signx == 10)//找到10条分界线
                {
                    //对每个字符取上下界

                    //第一个字符
                    signy = 0;
                    int[] arrayIntY1 = new int[bm.Height];
                    for (int x = 0; x < bm.Height; x++)
                    {
                        int count = 0;
                        for (int y = start1; y < end1; y++)
                        {
                            if (bm.GetPixel(y, x).R == 0)
                                count++;
                        }
                        arrayIntY1[x] = count;
                    }

                    for (int x = 0; x < bm.Height - 1; x++)
                    {
                        if (arrayIntY1[x] != 0 && arrayIntY1[x + 1] != 0 && signy == 0)
                        {
                            up1 = x;
                            signy = 1;
                        }
                        if (arrayIntY1[x] == 0 && arrayIntY1[x + 1] == 0 && signy == 1)
                        {
                            down1 = x;
                            signy = 2;
                        }
                    }

                    //第二个字符
                    signy = 0;
                    int[] arrayIntY2 = new int[bm.Height];
                    for (int x = 0; x < bm.Height; x++)
                    {
                        int count = 0;
                        for (int y = start2; y < end2; y++)
                        {
                            if (bm.GetPixel(y, x).R == 0)
                                count++;
                        }
                        arrayIntY2[x] = count;
                    }

                    for (int x = 0; x < bm.Height - 1; x++)
                    {
                        if (arrayIntY2[x] != 0 && arrayIntY2[x + 1] != 0 && signy == 0)
                        {
                            up2 = x;
                            signy = 1;
                        }
                        if (arrayIntY2[x] == 0 && arrayIntY2[x + 1] == 0 && signy == 1)
                        {
                            down2 = x;
                            signy = 2;
                        }
                    }

                    //第三个字符
                    signy = 0;
                    int[] arrayIntY3 = new int[bm.Height];
                    for (int x = 0; x < bm.Height; x++)
                    {
                        int count = 0;
                        for (int y = start3; y < end3; y++)
                        {
                            if (bm.GetPixel(y, x).R == 0)
                                count++;
                        }
                        arrayIntY3[x] = count;
                    }

                    for (int x = 0; x < bm.Height - 1; x++)
                    {
                        if (arrayIntY3[x] != 0 && arrayIntY3[x + 1] != 0 && signy == 0)
                        {
                            up3 = x;
                            signy = 1;
                        }
                        if (arrayIntY3[x] == 0 && arrayIntY3[x + 1] == 0 && signy == 1)
                        {
                            down3 = x;
                            signy = 2;
                        }
                    }

                    //第四个字符
                    signy = 0;
                    int[] arrayIntY4 = new int[bm.Height];
                    for (int x = 0; x < bm.Height; x++)
                    {
                        int count = 0;
                        for (int y = start4; y < end4; y++)
                        {
                            if (bm.GetPixel(y, x).R == 0)
                                count++;
                        }
                        arrayIntY4[x] = count;
                    }

                    for (int x = 0; x < bm.Height - 1; x++)
                    {
                        if (arrayIntY4[x] != 0 && arrayIntY4[x + 1] != 0 && signy == 0)
                        {
                            up4 = x;
                            signy = 1;
                        }
                        if (arrayIntY4[x] == 0 && arrayIntY4[x + 1] == 0 && signy == 1)
                        {
                            down4 = x;
                            signy = 2;
                        }
                    }

                    int[,] position = new int[4, 4];
                    position[0, 0] = start1;
                    position[0, 1] = end1;
                    position[0, 2] = up1;
                    position[0, 3] = down1;
                    position[1, 0] = start2;
                    position[1, 1] = end2;
                    position[1, 2] = up2;
                    position[1, 3] = down2;
                    position[2, 0] = start3;
                    position[2, 1] = end3;
                    position[2, 2] = up3;
                    position[2, 3] = down3;
                    position[3, 0] = start4;
                    position[3, 1] = end4;
                    position[3, 2] = up4;
                    position[3, 3] = down4;
                    return position;

                }
            }
            return null;
        }

分割成功率在90%以上。

blob.png

如何处理粘连字符暂时还没有思路,对于粘连字符无法分割,仍需进一步研究。

4.识别字符

数据保存格式为 

宽度 高度 字符像素数据 # 字符

比较也使用了简单粗暴的逐个像素点比对

逐个对比要识别的字符和数据库中的所有字符,选择相似度最高的为预测结果。

blob.png

正确率十分可观,在95%以上,用时也在可接受范围内。

本来想写一个自动提交验证获取更多训练集的函数的...懒得写了...

啊哈?我是学机器学习的?识别的时候我应该用点机器学习的算法提高准确率?好吧日后写一下...不过好像每个字符的大小不一样不太好搞...


源码下载:

代码:WAPcode.rar

字符数据:sepcode.txt