/*---------------------------------------------------------------------------*/
/*
** altivec.c
** iGoom Project
**
** Created by Guillaume Borios on Sun Dec 29 2002
** Copyright (c) 2002 iOS. All rights reserved.
*/
/*---------------------------------------------------------------------------*/

#define PERTEDEC 4
#define PERTEMASK 0xf
#define BUFFPOINTNB 16

#include "graphic.h"
#include "altivec.h"
#include "config.h"
#include <stdlib.h>
#include <stdio.h>


const void ppc_zoom_altivec (unsigned int *expix1, unsigned int *expix2,unsigned int prevX, unsigned int prevY, signed int *brutS, signed int *brutD, int buffratio, int precalCoef[16][16])
{
    const vector unsigned char zerovect = (vector unsigned char) (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
    const vector unsigned char pixelsUnpackPermutation1 = (vector unsigned char) (16,0,16,1,16,2,16,3,16,4,16,5,16,6,16,7);
    const vector unsigned char pixelsUnpackPermutation2 = (vector unsigned char) (16,8,16,9,16,10,16,11,16,12,16,13,16,14,16,15);
    const vector unsigned char pixelsRepackPermutation = (vector unsigned char) (1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0);
    const vector unsigned char coefsPermutation1 = (vector unsigned char) (16,0,16,0,16,0,16,0,16,1,16,1,16,1,16,1);
    const vector unsigned char coefsPermutation2 = (vector unsigned char) (16,2,16,2,16,2,16,2,16,3,16,3,16,3,16,3);
    const vector unsigned char swapPermutation = (vector unsigned char) (24,25,26,27,28,29,30,31,0,1,2,3,4,5,6,7);
    const vector unsigned char finalSwapPermutation = (vector unsigned char) (0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
    const vector unsigned short shiftRightVect = (vector unsigned short) (8,8,8,8,8,8,8,8);

    vector unsigned short pixelA;
    vector unsigned short pixelB;
    vector unsigned char pixel1;
    vector unsigned char pixel2;
    vector unsigned char coefs1;
    vector unsigned char coefs2;
    vector unsigned char coefsA;
    vector unsigned char coefsB;
    vector unsigned short pixelC;
    vector unsigned short pixelD;
    vector unsigned char pixel3;
    vector unsigned char pixel4;
    vector unsigned char coefsC;
    vector unsigned char coefsD;
    vector unsigned char pixelF;
    //unsigned int * velem2 = (unsigned int *)&pixel3;
    unsigned int * velemc2 = (unsigned int *)&coefs2;
    unsigned int * velem = (unsigned int *)&pixelF;
    unsigned int * velemc = (unsigned int *)&coefs1;
    vector unsigned char MSQ,LSQ,mask;

    int     myPos, myPos2;
    unsigned int ax = (prevX - 1) << PERTEDEC, ay = (prevY - 1) << PERTEDEC;

    unsigned int     bufsize = prevX * prevY * 2;
    unsigned int     bufwidth = 4*prevX;

    expix1[0]=expix1[prevX-1]=expix1[prevX*prevY-1]=expix1[prevX*prevY-prevX]=0;

    for (myPos = 0; myPos < bufsize-16; myPos += 2) {
        int     px, py, px2, py2;
        int     pos,pos2;
        int     brutSmypos;
        unsigned char * target;

        brutSmypos = brutS[myPos];
        myPos2 = myPos + 1;
        px = brutSmypos + (((brutD[myPos] - brutSmypos) * buffratio) >> BUFFPOINTNB);
        brutSmypos = brutS[myPos2];
        py = brutSmypos + (((brutD[myPos2] - brutSmypos) * buffratio) >> BUFFPOINTNB);
        pos = ((px >> PERTEDEC) + prevX * (py >> PERTEDEC));
        velemc[0] = precalCoef[px & PERTEMASK][py & PERTEMASK];

        myPos += 2;

        brutSmypos = brutS[myPos];
        myPos2 = myPos + 1;
        px2 = brutSmypos + (((brutD[myPos] - brutSmypos) * buffratio) >> BUFFPOINTNB);
        brutSmypos = brutS[myPos2];
        py2 = brutSmypos + (((brutD[myPos2] - brutSmypos) * buffratio) >> BUFFPOINTNB);
        pos2 = ((px2 >> PERTEDEC) + prevX * (py2 >> PERTEDEC));
        velemc2[0] = precalCoef[px2 & PERTEMASK][py2 & PERTEMASK];

        
        if ((py >= ay) || (px >= ax) || (py2 >= ay) || (px2 >= ax)) {
            expix2[(myPos-2) >> 1] = 0;
            expix2[myPos2 >> 1] = 0;
        }
        else
        {

            target = (unsigned char *)(expix1 + pos);
            MSQ = vec_ld(0, target);
            LSQ = vec_ld(16, target);
            mask = vec_lvsl(0, target);
            pixel1 = vec_perm(MSQ, LSQ, mask);

            target += bufwidth;
            MSQ = vec_ld(0, target);
            LSQ = vec_ld(16, target);
            mask = vec_lvsl(0, target);
            pixel3 = vec_perm(MSQ, LSQ, mask);
//            pixel3 = zerovect;
/*
            target = (unsigned char *)(expix1 + pos2);
            MSQ = vec_ld(0, target);
            LSQ = vec_ld(16, target);
            mask = vec_lvsl(0, target);
            pixel3 = vec_perm(MSQ, LSQ, mask);

            target += bufwidth;
            MSQ = vec_ld(0, target);
            LSQ = vec_ld(16, target);
            mask = vec_lvsl(0, target);
            pixel4 = vec_perm(MSQ, LSQ, mask);
*/
            pixel2 = vec_perm(pixel1,zerovect,pixelsUnpackPermutation2);
            pixel4 = vec_perm(pixel3,zerovect,pixelsUnpackPermutation2);
            pixel1 = vec_perm(pixel1,zerovect,pixelsUnpackPermutation1);
            pixel3 = vec_perm(pixel3,zerovect,pixelsUnpackPermutation1);

            coefsA = vec_perm(coefs1,zerovect,coefsPermutation2);
            coefsC = vec_perm(coefs2,zerovect,coefsPermutation2);
            coefsB = vec_perm(coefs1,zerovect,coefsPermutation1);
            coefsD = vec_perm(coefs2,zerovect,coefsPermutation1);

            pixelA = vec_mulo(pixel1,coefsA);
            pixelB = vec_mulo(pixel2,coefsB);
            pixelC = vec_mulo(pixel3,coefsC);
            pixelD = vec_mulo(pixel4,coefsD);

            pixelA = vec_add(pixelA,pixelB);
            pixelC = vec_add(pixelC,pixelD);

            pixelB = (vector unsigned short)vec_perm((vector unsigned char)pixelA,(vector unsigned char)pixelA,swapPermutation);
            pixelA = vec_add(pixelA,pixelB);
            pixelD = (vector unsigned short)vec_perm((vector unsigned char)pixelC,(vector unsigned char)pixelC,swapPermutation);
            pixelC = vec_add(pixelD,pixelC);
            
            pixelB = (vector unsigned short)vec_perm((vector unsigned char)pixelA,(vector unsigned char)pixelC,swapPermutation);
            pixelA = vec_sr(pixelA,shiftRightVect);


            pixelF = vec_perm((vector unsigned char)pixelA,(vector unsigned char)pixelB,pixelsRepackPermutation);

            expix2[(myPos-2) >> 1] = velem[0];
            expix2[myPos2 >> 1] = velem[1];


        }




        myPos += 2;

        brutSmypos = brutS[myPos];
        myPos2 = myPos + 1;
        px = brutSmypos + (((brutD[myPos] - brutSmypos) * buffratio) >> BUFFPOINTNB);
        brutSmypos = brutS[myPos2];
        py = brutSmypos + (((brutD[myPos2] - brutSmypos) * buffratio) >> BUFFPOINTNB);
        pos = ((px >> PERTEDEC) + prevX * (py >> PERTEDEC));
        velemc[0] = precalCoef[px & PERTEMASK][py & PERTEMASK];



        myPos += 2;

        brutSmypos = brutS[myPos];
        myPos2 = myPos + 1;
        px2 = brutSmypos + (((brutD[myPos] - brutSmypos) * buffratio) >> BUFFPOINTNB);
        brutSmypos = brutS[myPos2];
        py2 = brutSmypos + (((brutD[myPos2] - brutSmypos) * buffratio) >> BUFFPOINTNB);
        pos2 = ((px2 >> PERTEDEC) + prevX * (py2 >> PERTEDEC));
        velemc2[0] = precalCoef[px2 & PERTEMASK][py2 & PERTEMASK];

        if ((py >= ay) || (px >= ax) || (py2 >= ay) || (px2 >= ax)) {
            expix2[(myPos-2) >> 1] = 0;
            expix2[myPos2 >> 1] = 0;
            //expix2[(myPos-6) >> 1] = 0;//velem[0];
                //expix2[(myPos2-4) >> 1] = 0; //velem[1];

        }
        else
        {


        target = (unsigned char *)(expix1 + pos);
        MSQ = vec_ld(0, target);
        LSQ = vec_ld(16, target);
        mask = vec_lvsl(0, target);
        pixel1 = vec_perm(MSQ, LSQ, mask);

        target += bufwidth;
        MSQ = vec_ld(0, target);
        LSQ = vec_ld(16, target);
        mask = vec_lvsl(0, target);
        pixel3 = vec_perm(MSQ, LSQ, mask);
//        pixel3 = zerovect;
        /*
         target = (unsigned char *)(expix1 + pos2);
         MSQ = vec_ld(0, target);
         LSQ = vec_ld(16, target);
         mask = vec_lvsl(0, target);
         pixel3 = vec_perm(MSQ, LSQ, mask);

         target += bufwidth;
         MSQ = vec_ld(0, target);
         LSQ = vec_ld(16, target);
         mask = vec_lvsl(0, target);
         pixel4 = vec_perm(MSQ, LSQ, mask);
         */
        pixel2 = vec_perm(pixel1,zerovect,pixelsUnpackPermutation2);
        pixel4 = vec_perm(pixel3,zerovect,pixelsUnpackPermutation2);
        pixel1 = vec_perm(pixel1,zerovect,pixelsUnpackPermutation1);
        pixel3 = vec_perm(pixel3,zerovect,pixelsUnpackPermutation1);

        coefsA = vec_perm(coefs1,zerovect,coefsPermutation2);
        coefsC = vec_perm(coefs2,zerovect,coefsPermutation2);
        coefsB = vec_perm(coefs1,zerovect,coefsPermutation1);
        coefsD = vec_perm(coefs2,zerovect,coefsPermutation1);

        pixelA = vec_mulo(pixel1,coefsA);
        pixelB = vec_mulo(pixel2,coefsB);
        pixelC = vec_mulo(pixel3,coefsC);
        pixelD = vec_mulo(pixel4,coefsD);

        pixelA = vec_add(pixelA,pixelB);
        pixelC = vec_add(pixelC,pixelD);

        pixelB = (vector unsigned short)vec_perm((vector unsigned char)pixelA,(vector unsigned char)pixelA,swapPermutation);
        pixelA = vec_add(pixelA,pixelB);
        pixelD = (vector unsigned short)vec_perm((vector unsigned char)pixelC,(vector unsigned char)pixelC,swapPermutation);
        pixelC = vec_add(pixelD,pixelC);

        pixelB = (vector unsigned short)vec_perm((vector unsigned char)pixelA,(vector unsigned char)pixelC,swapPermutation);
        pixelA = vec_sr(pixelA,shiftRightVect);

        
            pixel1 = vec_perm((vector unsigned char)pixelA,(vector unsigned char)pixelB,pixelsRepackPermutation);

            pixelF = pixel1;
            
          //  pixelF = vec_perm((vector unsigned char)pixelF,(vector unsigned char)pixel1,finalSwapPermutation);

            expix2[(myPos-2) >> 1] = velem[1];
            expix2[myPos2 >> 1] = velem[0];
//            expix2[(myPos-4) >> 1] = velem[2];
//            expix2[(myPos2-6) >> 1] = velem[3];
 
 //           vec_st(pixelF,0,(unsigned char*)(expix2+((myPos2-6) >> 1)));
        }
    }
}
