/*
  Copyright (C) 1999-2001 Ricardo Ueda Karpischek

  This is free software; you can redistribute it and/or modify
  it under the terms of the version 2 of the GNU General Public
  License as published by the Free Software Foundation.

  This software is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this software; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
  USA.
*/

/*

pgmcut.c: generates one pgm file for each text column.

*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>

double d_pi=3.14159265358979323846;

/*

Pixel Buffers.

*/
int *lx=NULL,*ly=NULL,ls=0,lt=-1;
int *rx,*ry,rt=-1,rs=0;
int *ll,*rl;

/*

Save block to separate PGM file.

*/
void pgmcut(char *f,unsigned char *pb,int w,int h,int l,int r,int t,int b,int L)
{
    FILE *F;
    unsigned char *lb;
    unsigned char *wl;
    int n;

    wl = alloca(w);
    for (n=0; n<w; ++n)
        wl[n] = 255;

    /* consist cut limits */
    if ((l < 0) || (w <= r) || (r < l) ||
        (t < 0) || (h <= b) || (b < t)) {

        fprintf(stderr,"pgmcut: invalid cut limits\n");
        exit(1);
    }

    /* create or truncate the file */
    F = fopen(f,"w");

    /* output header */
    fprintf(F,"P5\n%d %d\n%d\n",r-l+1,b-t+1,255);

    /* cutting loop */
    for (lb=pb, n=0; n<h; ++n) {

        if ((t <= n) && (n <= b)) {

            if (L) {
                if (ll[n] < r) {
                    fwrite(lb+l,1,ll[n]-l+1,F);
                    fwrite(wl,1,(r-l+1)-(ll[n]-l+1),F);
                }
                else {
                    fwrite(lb+l,1,r-l+1,F);
                }
            }
            else {
                if (rl[n] > l) {
                    fwrite(wl,1,rl[n]-l+1,F);
                    fwrite(lb+l+rl[n]-l+1,1,(r-l+1)-(rl[n]-l+1),F);
                }
                else {
                    fwrite(lb+l,1,r-l+1,F);
                }
            }
        }
        lb += w;

    }

    fclose(F);
}

/*

Load PGM file into *pb.

*/
void loadpgm(char *f,unsigned char **pb,int *w,int *h)
{
    /* to read the file */
    static FILE *F;

    /* levels and line counter */
    static int v,n;

    /* read header */
    {
        int c,rc;

        F = fopen(f,"r");

        /* P5 magic number */
        if ((fgetc(F) != 'P') || (fgetc(F) != '5')) {
            fprintf(stderr,"%s is not a pgm raw file (invalid magic)",f);
            fclose(F);
            exit(1);
        }

        /*
            Reading loop. PGM defines "whitespace" as any from space,
            tab, CR or LF. Comments start with '#' and end on LF.
        */
        for (n=rc=0, *w=*h=v=0; (n<6) && ((c=fgetc(F)) != EOF); ) {

            /* reading comment */
            if ((c == '#') || ((rc==1) && (c!='\n'))) {
                rc = 1;
            }
    
            /* non-"whitespace" */
            else if ((c!=' ') && (c!='\t') && (c!='\r') && (c!='\n')) {

                /* invalid char */
                if ((c < '0') || ('9' < c)) {
                    fprintf(stderr,"%s is not a pgm raw file (non-digit found)",f);
                    fclose(F);
                    exit(1);
                }

                /* reading width */
                if (n <= 1) {
                    n = 1;
                    *w = *w * 10 + c - '0';
                }

                /* reading heigth */
                else if (n <= 3) {
                    n = 3;
                    *h = *h * 10 + c - '0';
                }

                /* reading levels */
                else if (n <= 5) {
                    n = 5;
                    v = v * 10 + v - '0';
                }
            }

            /* "whitespace" character */
            else {

                /* stop reading width */
                if (n == 1)
                    n = 2;

                /* stop reading height */
                else if (n == 3)
                    n = 4;

                /* stop reading levels */
                else if (n == 5)
                    n = 6;

                /* stop reading comment */
                rc = 0;
            }
        }

        /* alloc page buffer */
        *pb = malloc(*h * *w);
        ll = malloc(sizeof(int) * *h);
        rl = malloc(sizeof(int) * *h);

        if ((*pb==NULL) || (ll==NULL) || (rl==NULL)) {
            fprintf(stderr,"memory exhausted\n");
            exit(1);
        }

        /* prepare reading loop */
        n = 0;
    }

    /* read page */
    fread(*pb,1,*h * *w,F);
}

/*

Expand vertical line, expand the (l,r,t,b) box from the line limits,
and remove the line from the pixmap.

*/
void expand(unsigned char *pb,int w,int h,int i,int j,int *l,int *r,int *t,int *b)
{
    if ((i<0) || (w<=i) || (j<0) || (h<=j))
        return;
    if (pb[j*w+i] < 102) {

        if (++lt >= ls) {
            ls += 15000;
            lx = realloc(lx,sizeof(int) * ls);
            ly = realloc(ly,sizeof(int) * ls);
            if ((lx==NULL) || (ly==NULL)) {
                fprintf(stderr,"memory exhausted\n");
                exit(1);
            }
        }

        lx[lt] = i;
        ly[lt] = j;

        if (*l > i)
            *l = i;
        if (*r < i)
            *r = i;
        if (*t > j)
            *t = j;
        if (*b < j)
            *b = j;

        if (ll[j] > i)
            ll[j] = i;
        if (rl[j] < i)
            rl[j] = i;

        pb[j*w+i] = 255;

        expand(pb,w,h,i-1,j,l,r,t,b);
        expand(pb,w,h,i-1,j-1,l,r,t,b);
        expand(pb,w,h,i-1,j+1,l,r,t,b);
        expand(pb,w,h,i,j-1,l,r,t,b);
        expand(pb,w,h,i,j+1,l,r,t,b);
        expand(pb,w,h,i+1,j-1,l,r,t,b);
        expand(pb,w,h,i+1,j,l,r,t,b);
        expand(pb,w,h,i+1,j+1,l,r,t,b);
    }
}

/*

Faz a regressao linear do conjunto de pontos c, restrita ao
intervalo [x0,x1[.

Devolve 0 se a resposta e' a equacao y= m*x + b, ou 1 se a
resposta e' a equacao x = m*y + b, ou -1 se os dados sao
intrataveis.

*/
int reglin(float *m,float *b,float *R)
{
    int i,i0,N,t;
    float sx,sy,sxy,sx2,sy2,sigx,sigy;
    float eps = 0.0001;

    N = lt + 1;

    if (N < 2) {
        return(-1);
    }

    /*
        somatorias de x, y xy, x2 e y2
        O uso do cast (float) e' para precaver-se contra um (totalmente
        improvavel) estouro de inteiros.
    */
    sx = sy = sxy = sx2 = sy2 = 0.0;
    for (i=0; i<=lt; ++i) {
        sx += lx[i];
        sy += ly[i];
        sxy += lx[i] * ly[i];
        sx2 += ((float)lx[i]) * lx[i];
        sy2 += ((float)ly[i]) * ly[i];
    }

    /* no momento nao temos a formula do coeficiente de correlacao */
    *R = 1;

    if ((fabs(sx2 - sx*sx/N) < eps) || (fabs((sxy-sx*sy/N) / (sx2-sx*sx/N)) > 1)) {

        if (fabs(sy2 - sy*sy/N) < eps) {
            printf("dados intrataveis na regressao linear\n");
            return(-1);
        }

        /* inclinacao da reta e interceptacao em x */
        *m = (sxy - sx*sy/N) / (sy2 - sy*sy/N);
        *b = (sx - *m*sy) / N;

        /* parametrizacao invertida */
        return(1);
    }

    /* inclinacao da reta e interceptacao em y */
    *m = (sxy - sx*sy/N) / (sx2 - sx*sx/N);
    *b = (sy - *m*sx) / N;

    /* parametrizacao normal */
    return(0);
}

/*

Locate vertical lines (version 1).

*/
void vlines(unsigned char *pb,int w,int h,int *l,int *r,int *t,int *b)
{
    int i,j,k,f,d,n;

    *l = w;
    *r = 0;
    *t = h;
    *b = 0;

    lt = -1;

    /* search vertical lines */
    for (j=0; j<h; j+=d) {

        if (h-j < 130)
            d = h-j;
        else
            d = 100;

        for (i=f=0; (i<w) && (f<2); ++i) {

            /* search blank area */
            if (f == 0) {

                /* account black pixels */
                for (k=n=0; k<d; ++k)
                    if (pb[(j+k)*w+i] < 153)
                        ++n;

                if (n*25 < d)
                    f = 1;
            }

            /* search vertical line */
            else if (f == 1) {

                /* account black pixels */
                for (k=n=0; k<d; ++k)
                    if (pb[(j+k)*w+i] < 102)
                        ++n;

                if (n*1.1 > d) {
                    f = 2;
                    expand(pb,w,h,i,j,l,r,t,b);
                }
            }
        }
    }

    {
        float m,b,R,t;
        int r;

        r = reglin(&m,&b,&R);
        t = atan(m);
        if (r == 0) {
            printf("angle: %f %f %d\n",m,t,(int)((t)*180/d_pi));
        }
        else if (r == 1) {
            printf("angle: %f %f %d\n",m,t,(int)((t+d_pi/2)*180/d_pi));
        }
    }
}

/*

Locate vertical lines (version 2).

*/
void vlines2(unsigned char *pb,int w,int h,int *l,int *r,int *t,int *b)
{
    int i,j,n;
    int l2,r2,t2,b2;
    unsigned char *pb2;

    pb2 = alloca(h * w);
    memcpy(pb2,pb,h*w);

    *l = w;
    *r = 0;
    *t = h;
    *b = 0;

    lt = -1;
    rt = -1;

    for (j=0; j<h; ++j) {
        for (i=w/3; (i<2*w/3); ++i) {

            if (pb2[j*w+i] < 102) {

                l2 = r2 = i;
                t2 = b2 = j;
                lt = -1;
                expand(pb2,w,h,i,j,&l2,&r2,&t2,&b2);

                if (b2 - t2 > 100) {

                    if (++rt >= rs) {
                        rs += 150;
                        rx = realloc(rx,sizeof(int) * rs);
                        ry = realloc(ry,sizeof(int) * rs);
                        if ((rx==NULL) || (ry==NULL)) {
                            fprintf(stderr,"memory exhausted\n");
                            exit(1);
                        }
                    }
                    rx[rt] = i;
                    ry[rt] = j;

                    if (l2 < *l)
                        *l = l2;
                    if (r2 > *r)
                        *r = r2;
                    if (t2 < *t)
                        *t = t2;
                    if (b2 > *b)
                        *b = b2;
                }
            }
        }
    }

    lt = -1;

    {
        int k;

        for (k=0; k<h; ++k) {
            ll[k] = w;
            rl[k] = 0;
        }

        for (k=0; k<=rt; ++k) {

            l2 = r2 = rx[k];
            t2 = b2 = ry[k];
            expand(pb,w,h,rx[k],ry[k],&l2,&r2,&t2,&b2);
        }

        for (k=0; k<h; ++k) {
            if (ll[k]<w) {
                if ((*l>ll[k]) || (ll[k]>rl[k]) || (rl[k]>*r)) {
                    printf("oops.. %d %d %d %d\n",*l,ll[k],rl[k],*r);
                }
            }
        }

        for (k=1; k<h; ++k) {
            if ((ll[k] == w) && (ll[k-1] < w))
                ll[k] = ll[k-1];
            if ((rl[k] == 0) && (rl[k-1] > 0))
                rl[k] = rl[k-1];
        }
    }

    {
        float m,b,R,t;
        int r;

        r = reglin(&m,&b,&R);
        t = atan(m);
        if (r == 0) {
            printf("angle: %f %f %d\n",m,t,(int)((t)*180/d_pi));
        }
        else if (r == 1) {
            printf("angle: %f %f %d\n",m,t,(int)((t+d_pi/2)*180/d_pi));
        }
    }
}

/*

Locate horizontal margin (left if dx=1, right if dx=-1).

*/
int hmargin(unsigned char *pb,int w,int h,int dx)
{
    int i,j,k,f,d,n,cx;
    int p,*L,lm,r;

    lm = (h/100) + 1;
    L = alloca(sizeof(int) * lm);

    /* compute margin */
    for (j=p=0; (j<h) && (p<lm); j+=d, ++p) {

        if (h-j < 130)
            d = h-j;
        else
            d = 100;

        L[p] = (dx==1) ? w : 0;

        i = (dx == -1) ? (w-30) : 30;

        for (cx=f=0; (cx<w) && (f<2); i+=dx, ++cx) {

            /* search blank area */
            if (f == 0) {

                /* account black pixels */
                for (k=n=0; k<d; ++k)
                    if (pb[(j+k)*w+i] < 153)
                        ++n;

                if (n*25 < d)
                    f = 1;
            }

            /* search characters */
            else if (f == 1) {

                /* account black pixels */
                for (k=n=0; k<d; ++k)
                    if (pb[(j+k)*w+i] < 102)
                        ++n;

                if (n*8 > d) {
                    f = 2;
                    L[p] = i;
                }
            }
        }
    }

    if (dx == 1) {
        r = w;
        for (p=0; p<lm; ++p)
            if (L[p] < r)
                r = L[p];
    }

    else {
        r = 0;
        for (p=0; p<lm; ++p)
            if (L[p] > r)
                r = L[p];
    }

    return(r);
}

/*

Locate vertical margin (top if dy==1, bottom if dy==-1).

*/
int vmargin(unsigned char *pb,int w,int h,int dy)
{
    int i,j,n,r,f,cy;

    j = (dy == 1) ? 0 : h-1;

    /* locate top margin */
    for (cy=0, r=-1, f=0; (j<h) && (cy<h); j+=dy, ++cy) {

        if (f == 0) {

            /* account black pixels */
            for (i=n=0; i<w; ++i)
                if (pb[j*w+i] < 153)
                    ++n;

            if (25*n < w) {
                f = 1;
            }
        }

        else if (f == 1) {

            /* account black pixels */
            for (i=n=0; i<w; ++i)
                if (pb[j*w+i] < 102)
                    ++n;

            if (6*n > w) {
                f = 2;
                if (r < 0)
                    r = j;
            }

            else if ((20*n > w) && (r < 0))
                r = j;

            else if (20*n < w)
                r = -1;
        }
    }

    return(r);
}



/*

The program begins here.

*/
int main(int argc,char *argv[])
{
    int w,h,l,r,t,b,l0,r0,t0,b0;
    unsigned char *pb;
    char fn[256];
    int pc;

    if (strcmp(argv[1],"-c") == 0) {
        int i;

        pc = 1;
        for (i=2; i<argc; ++i)
            argv[i-1] = argv[i];
        --argc;
    }
    else {
        pc = 0;
    }

    if (argc != 2) {
        fprintf(stderr,"usage: pgmblock file\n");
        exit(1);
    }

    loadpgm(argv[1],&pb,&w,&h);

    /*
    {
        int i,j,c[256];

        for (i=0; i<256; ++i) {
            c[i] = 0;
        }

        for (j=0; j<h; ++j) {
            for (i=0; i<w; ++i)
                ++c[pb[j*w+i]];
        }

        for (j=0; j<16; ++j) {
            for (i=0; i<16; ++i) {
                printf("%8d",c[j*16+i]);
            }
            printf("\n");
        }

        exit(0);
    }
    */

    /* locate vertical lines */
    vlines2(pb,w,h,&l,&r,&t,&b);

    printf("separator box l=%d r=%d t=%d b=%d\n",l,r,t,b);
    printf("separator width %d\n",r-l+1);

    /* locate left margin */
    l0 = hmargin(pb,w,h,1);

    /* locate right margin */
    r0 = hmargin(pb,w,h,-1);

    /* locate top margin */
    t0 = vmargin(pb,w,h,1);

    /* locate bottom margin */
    b0 = vmargin(pb,w,h,-1);

    if (l0 > 30)
        l0 -= 30;
    else
        l0 = 0;

    if (r0+30 < w)
        r0 += 30;
    else
        r0 = w-1;

    /*
        prefer the top limit based on the vertical lines detection.
    */
    if ((!pc) || (t0 > t))
        t0 = t;

    if (t0 > 30)
        t0 -= 30;
    else
        t0 = 0;

    /*
        prefer the bottom limit based on the vertical lines detection.
    */
    if ((!pc) || (b0 < b))
        b0 = b;

    if (b0+30 < h)
        b0 += 30;
    else
        b0 = h-1;

    {
        int a,d;

        strncpy(fn,argv[1],255);
        fn[255] = 0;
        a = strlen(fn);
        if ((a >4) && (strcmp(fn+a-4,".pgm") == 0))
            a -= 4;

        d = abs(((r-l0)-(r0-l)));
        if (d > 80) {
            printf("block division for %s under suspection (%d)\n",fn,d);
        }
        else {
            printf("block widths difference %d for %s\n",d,fn);
        }

        if (a+6 < 256) {
            sprintf(fn+a,"-l.pgm");
            pgmcut(fn,pb,w,h,l0,r,t0,b0,1);
            sprintf(fn+a,"-r.pgm");
            pgmcut(fn,pb,w,h,l,r0,t0,b0,0);
        }
        else {
            fprintf(stderr,"filename too long\n");
            exit(1);
        }
    }

    exit(0);
}
