00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 #include <stdio.h>
00015 #include <stdlib.h>
00016 #include <string.h>
00017 
00018 #include "SDL_imageFilter.h"
00019 
00023 #define SWAP_32(x) (((x) >> 24) | (((x) & 0x00ff0000) >> 8)  | (((x) & 0x0000ff00) << 8)  | ((x) << 24))
00024 
00025 
00026 
00030 static int SDL_imageFilterUseMMX = 1;
00031 
00032 
00033 #if defined(__GNUC__)
00034 #define GCC__
00035 #endif
00036 
00042 unsigned int _cpuFlags()
00043 {
00044         unsigned int flags = 0;
00045 
00046 #ifdef USE_MMX
00047 #if !defined(GCC__)
00048         __asm
00049         {
00050                 pusha
00051                         mov eax, 1
00052                         cpuid   
00053                         mov flags,edx   
00054                         popa
00055         }
00056 #else
00057         asm volatile ("pusha                 \n\t" "mov    %1, %%eax     \n\t"  
00058                 "cpuid                \n\t"     
00059                 "mov    %%edx, %0     \n\t"     
00060                 "popa                \n\t":"=m" (flags) 
00061                 :"i"(0x00000001)        
00062                 );
00063 #endif
00064 #endif
00065 
00066         return (flags);
00067 }
00068 
00074 int SDL_imageFilterMMXdetect(void)
00075 {
00076         unsigned int mmx_bit;
00077 
00078         
00079         if (SDL_imageFilterUseMMX == 0) {
00080                 return (0);
00081         }
00082 
00083         mmx_bit = _cpuFlags();
00084         mmx_bit &= 0x00800000;
00085         mmx_bit = (mmx_bit && 0x00800000);
00086 
00087         return (int)(mmx_bit);
00088 }
00089 
00093 void SDL_imageFilterMMXoff()
00094 {
00095         SDL_imageFilterUseMMX = 0;
00096 }
00097 
00101 void SDL_imageFilterMMXon()
00102 {
00103         SDL_imageFilterUseMMX = 1;
00104 }
00105 
00106 
00107 
00118 int SDL_imageFilterAddMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00119 {
00120 #ifdef USE_MMX
00121 #if !defined(GCC__)
00122         __asm
00123         {
00124                 pusha
00125                         mov eax, Src1   
00126                         mov ebx, Src2   
00127                         mov edi, Dest   
00128                         mov ecx, SrcLength      
00129                         shr ecx, 3      
00130                         align 16        
00131 L1010:
00132                 movq mm1, [eax] 
00133                 paddusb mm1, [ebx]      
00134                 movq [edi], mm1 
00135                         add eax, 8      
00136                         add ebx, 8      
00137                         add edi, 8
00138                         dec ecx 
00139                         jnz L1010       
00140                         emms 
00141                         popa
00142         }
00143 #else
00144         asm volatile
00145                 ("pusha              \n\t" "mov          %2, %%eax \n\t"        
00146                 "mov          %1, %%ebx \n\t"   
00147                 "mov          %0, %%edi \n\t"   
00148                 "mov          %3, %%ecx \n\t"   
00149                 "shr          $3, %%ecx \n\t"   
00150                 ".align 16              \n\t"   
00151                 "1: movq (%%eax), %%mm1 \n\t"           
00152                 "paddusb (%%ebx), %%mm1 \n\t"   
00153                 "movq    %%mm1, (%%edi) \n\t"   
00154                 "add          $8, %%eax \n\t"   
00155                 "add          $8, %%ebx \n\t"   
00156                 "add          $8, %%edi \n\t" "dec              %%ecx \n\t"     
00157                 "jnz             1b     \n\t"     
00158                 "emms                   \n\t"   
00159                 "popa                   \n\t":"=m" (Dest)       
00160                 :"m"(Src2),             
00161                 "m"(Src1),              
00162                 "m"(SrcLength)          
00163                 );
00164 #endif
00165         return (0);
00166 #else
00167         return (-1);
00168 #endif
00169 }
00170 
00181 int SDL_imageFilterAdd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00182 {
00183         unsigned int i, istart;
00184         unsigned char *cursrc1, *cursrc2, *curdst;
00185         int result;
00186 
00187         
00188         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00189                 return(-1);
00190         if (length == 0)
00191                 return(0);
00192 
00193         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00194 
00195                 
00196                 SDL_imageFilterAddMMX(Src1, Src2, Dest, length);
00197 
00198                 
00199                 if ((length & 7) > 0) {
00200                         
00201                         istart = length & 0xfffffff8;
00202                         cursrc1 = &Src1[istart];
00203                         cursrc2 = &Src2[istart];
00204                         curdst = &Dest[istart];
00205                 } else {
00206                         
00207                         return (0);
00208                 }
00209         } else {
00210                 
00211                 istart = 0;
00212                 cursrc1 = Src1;
00213                 cursrc2 = Src2;
00214                 curdst = Dest;
00215         }
00216 
00217         
00218         for (i = istart; i < length; i++) {
00219                 result = (int) *cursrc1 + (int) *cursrc2;
00220                 if (result > 255)
00221                         result = 255;
00222                 *curdst = (unsigned char) result;
00223                 
00224                 cursrc1++;
00225                 cursrc2++;
00226                 curdst++;
00227         }
00228 
00229         return (0);
00230 }
00231 
00243 int SDL_imageFilterMeanMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength,
00244                                                    unsigned char *Mask)
00245 {
00246 #ifdef USE_MMX
00247 #if !defined(GCC__)
00248         __asm
00249         { 
00250                 pusha
00251                         mov edx, Mask 
00252                         movq mm0, [edx] 
00253                 mov eax, Src1 
00254                         mov ebx, Src2 
00255                         mov edi, Dest 
00256                         mov ecx, SrcLength 
00257                         shr ecx, 3      
00258                         align 16        
00259 L21011:
00260                 movq mm1,  [eax]        
00261                 movq mm2,  [ebx]        
00262                 
00263                 psrlw mm1, 1    
00264                         psrlw mm2, 1    
00265                         pand mm1, mm0   
00266                         
00267                         pand mm2, mm0   
00268                         
00269                         paddusb mm1,  mm2       
00270                         movq [edi],  mm1        
00271                         add eax,  8     
00272                         add ebx,  8     
00273                         add edi,  8
00274                         dec ecx         
00275                         jnz L21011      
00276                         emms    
00277                         popa
00278         }
00279 #else
00280         asm volatile
00281                 ("pusha              \n\t" "movl         %4, %%edx \n\t"        
00282                 "movq    (%%edx), %%mm0 \n\t"   
00283                 "mov          %2, %%eax \n\t"   
00284                 "mov          %1, %%ebx \n\t"   
00285                 "mov          %0, %%edi \n\t"   
00286                 "mov          %3, %%ecx \n\t"   
00287                 "shr          $3, %%ecx \n\t"   
00288                 ".align 16              \n\t"   
00289                 "1:                      \n\t"
00290                 "movq    (%%eax), %%mm1 \n\t"   
00291                 "movq    (%%ebx), %%mm2 \n\t"   
00292                 
00293                 "psrlw        $1, %%mm1 \n\t"   
00294                 "psrlw        $1, %%mm2 \n\t"   
00295                 
00296                 ".byte     0x0f, 0xdb, 0xc8 \n\t"
00297                 
00298                 ".byte     0x0f, 0xdb, 0xd0 \n\t" 
00299                 "paddusb   %%mm2, %%mm1 \n\t"   
00300                 "movq    %%mm1, (%%edi) \n\t"   
00301                 "add          $8, %%eax \n\t"   
00302                 "add          $8, %%ebx \n\t"   
00303                 "add          $8, %%edi \n\t" 
00304                 "dec              %%ecx \n\t"   
00305                 "jnz                 1b \n\t"     
00306                 "emms                   \n\t"   
00307                 "popa                   \n\t":"=m" (Dest)       
00308                 :"m"(Src2),             
00309                 "m"(Src1),              
00310                 "m"(SrcLength),         
00311                 "m"(Mask)                       
00312                 );
00313 #endif
00314         return (0);
00315 #else
00316         return (-1);
00317 #endif
00318 }
00319 
00330 int SDL_imageFilterMean(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00331 {
00332         static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
00333         unsigned int i, istart;
00334         unsigned char *cursrc1, *cursrc2, *curdst;
00335         int result;
00336 
00337         
00338         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00339                 return(-1);
00340         if (length == 0)
00341                 return(0);
00342 
00343         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00344                 
00345                 SDL_imageFilterMeanMMX(Src1, Src2, Dest, length, Mask);
00346 
00347                 
00348                 if ((length & 7) > 0) {
00349                         
00350                         istart = length & 0xfffffff8;
00351                         cursrc1 = &Src1[istart];
00352                         cursrc2 = &Src2[istart];
00353                         curdst = &Dest[istart];
00354                 } else {
00355                         
00356                         return (0);
00357                 }
00358         } else {
00359                 
00360                 istart = 0;
00361                 cursrc1 = Src1;
00362                 cursrc2 = Src2;
00363                 curdst = Dest;
00364         }
00365 
00366         
00367         for (i = istart; i < length; i++) {
00368                 result = (int) *cursrc1 / 2 + (int) *cursrc2 / 2;
00369                 *curdst = (unsigned char) result;
00370                 
00371                 cursrc1++;
00372                 cursrc2++;
00373                 curdst++;
00374         }
00375 
00376         return (0);
00377 }
00378 
00389 int SDL_imageFilterSubMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00390 {
00391 #ifdef USE_MMX
00392 #if !defined(GCC__)
00393         __asm
00394         {
00395                 pusha
00396                         mov eax,  Src1  
00397                         mov ebx,  Src2  
00398                         mov edi,  Dest  
00399                         mov ecx,  SrcLength     
00400                         shr ecx,  3     
00401                         align 16 
00402 L1012:
00403                 movq mm1,  [eax]        
00404                 psubusb mm1,  [ebx]     
00405                 movq [edi],  mm1        
00406                         add eax, 8      
00407                         add ebx, 8      
00408                         add edi, 8
00409                         dec ecx 
00410                         jnz L1012       
00411                         emms 
00412                         popa
00413         }
00414 #else
00415         asm volatile
00416                 ("pusha              \n\t" "mov %2, %%eax \n\t" 
00417                 "mov %1, %%ebx \n\t"    
00418                 "mov %0, %%edi \n\t"    
00419                 "mov %3, %%ecx \n\t"    
00420                 "shr $3, %%ecx \n\t"    
00421                 ".align 16       \n\t"  
00422                 "1: movq (%%eax), %%mm1 \n\t"     
00423                 "psubusb (%%ebx), %%mm1 \n\t"   
00424                 "movq    %%mm1, (%%edi) \n\t"   
00425                 "add $8, %%eax \n\t"    
00426                 "add $8, %%ebx \n\t"    
00427                 "add $8, %%edi \n\t" "dec %%ecx     \n\t"       
00428                 "jnz 1b         \n\t"     
00429                 "emms          \n\t"    
00430                 "popa                   \n\t":"=m" (Dest)       
00431                 :"m"(Src2),             
00432                 "m"(Src1),              
00433                 "m"(SrcLength)          
00434                 );
00435 #endif
00436         return (0);
00437 #else
00438         return (-1);
00439 #endif
00440 }
00441 
00452 int SDL_imageFilterSub(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00453 {
00454         unsigned int i, istart;
00455         unsigned char *cursrc1, *cursrc2, *curdst;
00456         int result;
00457 
00458         
00459         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00460                 return(-1);
00461         if (length == 0)
00462                 return(0);
00463 
00464         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00465                 
00466                 SDL_imageFilterSubMMX(Src1, Src2, Dest, length);
00467 
00468                 
00469                 if ((length & 7) > 0) {
00470                         
00471                         istart = length & 0xfffffff8;
00472                         cursrc1 = &Src1[istart];
00473                         cursrc2 = &Src2[istart];
00474                         curdst = &Dest[istart];
00475                 } else {
00476                         
00477                         return (0);
00478                 }
00479         } else {
00480                 
00481                 istart = 0;
00482                 cursrc1 = Src1;
00483                 cursrc2 = Src2;
00484                 curdst = Dest;
00485         }
00486 
00487         
00488         for (i = istart; i < length; i++) {
00489                 result = (int) *cursrc1 - (int) *cursrc2;
00490                 if (result < 0)
00491                         result = 0;
00492                 *curdst = (unsigned char) result;
00493                 
00494                 cursrc1++;
00495                 cursrc2++;
00496                 curdst++;
00497         }
00498 
00499         return (0);
00500 }
00501 
00512 int SDL_imageFilterAbsDiffMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00513 {
00514 #ifdef USE_MMX
00515 #if !defined(GCC__)
00516         __asm
00517         {
00518                 pusha
00519                         mov eax, Src1   
00520                         mov ebx, Src2   
00521                         mov edi, Dest   
00522                         mov ecx, SrcLength      
00523                         shr ecx,  3     
00524                         align 16        
00525 L1013:
00526                 movq mm1,  [eax]        
00527                 movq mm2,  [ebx]        
00528                 psubusb mm1,  [ebx]     
00529                 psubusb mm2,  [eax]     
00530                 por mm1,  mm2   
00531                         movq [edi],  mm1        
00532                         add eax, 8      
00533                         add ebx, 8      
00534                         add edi, 8
00535                         dec ecx         
00536                         jnz L1013       
00537                         emms         
00538                         popa
00539         }
00540 #else
00541         asm volatile
00542                 ("pusha              \n\t" "mov %2, %%eax \n\t" 
00543                 "mov %1, %%ebx \n\t"    
00544                 "mov %0, %%edi \n\t"    
00545                 "mov %3, %%ecx \n\t"    
00546                 "shr $3, %%ecx \n\t"    
00547                 ".align 16       \n\t"  
00548                 "1: movq (%%eax), %%mm1 \n\t"     
00549                 "movq    (%%ebx), %%mm2 \n\t"   
00550                 "psubusb (%%ebx), %%mm1 \n\t"   
00551                 "psubusb (%%eax), %%mm2 \n\t"   
00552                 "por       %%mm2, %%mm1 \n\t"   
00553                 "movq    %%mm1, (%%edi) \n\t"   
00554                 "add $8, %%eax \n\t"    
00555                 "add $8, %%ebx \n\t"    
00556                 "add $8, %%edi \n\t" "dec %%ecx     \n\t"       
00557                 "jnz 1b        \n\t"      
00558                 "emms          \n\t"    
00559                 "popa                   \n\t":"=m" (Dest)       
00560                 :"m"(Src2),             
00561                 "m"(Src1),              
00562                 "m"(SrcLength)          
00563                 );
00564 #endif
00565         return (0);
00566 #else
00567         return (-1);
00568 #endif
00569 }
00570 
00581 int SDL_imageFilterAbsDiff(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00582 {
00583         unsigned int i, istart;
00584         unsigned char *cursrc1, *cursrc2, *curdst;
00585         int result;
00586 
00587         
00588         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00589                 return(-1);
00590         if (length == 0)
00591                 return(0);
00592 
00593         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00594                 
00595                 SDL_imageFilterAbsDiffMMX(Src1, Src2, Dest, length);
00596 
00597                 
00598                 if ((length & 7) > 0) {
00599                         
00600                         istart = length & 0xfffffff8;
00601                         cursrc1 = &Src1[istart];
00602                         cursrc2 = &Src2[istart];
00603                         curdst = &Dest[istart];
00604                 } else {
00605                         
00606                         return (0);
00607                 }
00608         } else {
00609                 
00610                 istart = 0;
00611                 cursrc1 = Src1;
00612                 cursrc2 = Src2;
00613                 curdst = Dest;
00614         }
00615 
00616         
00617         for (i = istart; i < length; i++) {
00618                 result = abs((int) *cursrc1 - (int) *cursrc2);
00619                 *curdst = (unsigned char) result;
00620                 
00621                 cursrc1++;
00622                 cursrc2++;
00623                 curdst++;
00624         }
00625 
00626         return (0);
00627 }
00628 
00639 int SDL_imageFilterMultMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00640 {
00641 #ifdef USE_MMX
00642 #if !defined(GCC__)
00643         __asm
00644         {
00645                 pusha
00646                         mov eax, Src1   
00647                         mov ebx, Src2   
00648                         mov edi, Dest   
00649                         mov ecx, SrcLength   
00650                         shr ecx, 3   
00651                         pxor mm0, mm0   
00652                         align 16        
00653 L1014:
00654                 movq mm1, [eax]   
00655                 movq mm3, [ebx]   
00656                 movq mm2, mm1   
00657                         movq mm4, mm3   
00658                         punpcklbw mm1, mm0   
00659                         punpckhbw mm2, mm0   
00660                         punpcklbw mm3, mm0   
00661                         punpckhbw mm4, mm0   
00662                         pmullw mm1, mm3   
00663                         pmullw mm2, mm4   
00664                         
00665                         movq mm5, mm1   
00666                         movq mm6, mm2   
00667                         psraw mm5, 15   
00668                         psraw mm6, 15   
00669                         pxor mm1, mm5   
00670                         pxor mm2, mm6   
00671                         psubsw mm1, mm5   
00672                         psubsw mm2, mm6   
00673                         packuswb mm1, mm2   
00674                         movq [edi], mm1   
00675                         add eax, 8   
00676                         add ebx, 8   
00677                         add edi, 8
00678                         dec ecx         
00679                         jnz L1014       
00680                         emms 
00681                         popa
00682         }
00683 #else
00684         asm volatile
00685                 ("pusha              \n\t" "mov %2, %%eax \n\t" 
00686                 "mov %1, %%ebx \n\t"    
00687                 "mov %0, %%edi \n\t"    
00688                 "mov %3, %%ecx \n\t"    
00689                 "shr $3, %%ecx \n\t"    
00690                 "pxor      %%mm0, %%mm0 \n\t"   
00691                 ".align 16       \n\t"  
00692                 "1: movq (%%eax), %%mm1 \n\t"     
00693                 "movq    (%%ebx), %%mm3 \n\t"   
00694                 "movq      %%mm1, %%mm2 \n\t"   
00695                 "movq      %%mm3, %%mm4 \n\t"   
00696                 "punpcklbw %%mm0, %%mm1 \n\t"   
00697                 "punpckhbw %%mm0, %%mm2 \n\t"   
00698                 "punpcklbw %%mm0, %%mm3 \n\t"   
00699                 "punpckhbw %%mm0, %%mm4 \n\t"   
00700                 "pmullw    %%mm3, %%mm1 \n\t"   
00701                 "pmullw    %%mm4, %%mm2 \n\t"   
00702                 
00703                 "movq      %%mm1, %%mm5 \n\t"   
00704                 "movq      %%mm2, %%mm6 \n\t"   
00705                 "psraw       $15, %%mm5 \n\t"   
00706                 "psraw       $15, %%mm6 \n\t"   
00707                 "pxor      %%mm5, %%mm1 \n\t"   
00708                 "pxor      %%mm6, %%mm2 \n\t"   
00709                 "psubsw    %%mm5, %%mm1 \n\t"   
00710                 "psubsw    %%mm6, %%mm2 \n\t"   
00711                 "packuswb  %%mm2, %%mm1 \n\t"   
00712                 "movq    %%mm1, (%%edi) \n\t"   
00713                 "add $8, %%eax \n\t"    
00714                 "add $8, %%ebx \n\t"    
00715                 "add $8, %%edi \n\t" "dec %%ecx     \n\t"       
00716                 "jnz 1b        \n\t"      
00717                 "emms          \n\t"    
00718                 "popa \n\t":"=m" (Dest) 
00719                 :"m"(Src2),             
00720                 "m"(Src1),              
00721                 "m"(SrcLength)          
00722                 );
00723 #endif
00724         return (0);
00725 #else
00726         return (-1);
00727 #endif
00728 }
00729 
00740 int SDL_imageFilterMult(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00741 {
00742         unsigned int i, istart;
00743         unsigned char *cursrc1, *cursrc2, *curdst;
00744         int result;
00745 
00746         
00747         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00748                 return(-1);
00749         if (length == 0)
00750                 return(0);
00751 
00752         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00753                 
00754                 SDL_imageFilterMultMMX(Src1, Src2, Dest, length);
00755 
00756                 
00757                 if ((length & 7) > 0) {
00758                         
00759                         istart = length & 0xfffffff8;
00760                         cursrc1 = &Src1[istart];
00761                         cursrc2 = &Src2[istart];
00762                         curdst = &Dest[istart];
00763                 } else {
00764                         
00765                         return (0);
00766                 }
00767         } else {
00768                 
00769                 istart = 0;
00770                 cursrc1 = Src1;
00771                 cursrc2 = Src2;
00772                 curdst = Dest;
00773         }
00774 
00775         
00776         for (i = istart; i < length; i++) {
00777 
00778                 
00779 
00780                 result = (int) *cursrc1 * (int) *cursrc2;
00781                 if (result > 255)
00782                         result = 255;
00783                 *curdst = (unsigned char) result;
00784                 
00785                 cursrc1++;
00786                 cursrc2++;
00787                 curdst++;
00788         }
00789 
00790         return (0);
00791 }
00792 
00803 int SDL_imageFilterMultNorASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00804 {
00805 #ifdef USE_MMX
00806 #if !defined(GCC__)
00807         __asm
00808         {
00809                 pusha
00810                         mov edx, Src1   
00811                         mov esi, Src2   
00812                         mov edi, Dest   
00813                         mov ecx, SrcLength   
00814                         align 16        
00815 L10141:
00816                 mov al, [edx]   
00817                 mul [esi]       
00818                 mov [edi], al   
00819                         inc edx         
00820                         inc esi                 
00821                         inc edi
00822                         dec ecx 
00823                         jnz L10141      
00824                         popa
00825         }
00826 #else
00827         asm volatile
00828                 ("pusha              \n\t" "mov %2, %%edx \n\t" 
00829                 "mov %1, %%esi \n\t"    
00830                 "mov %0, %%edi \n\t"    
00831                 "mov %3, %%ecx \n\t"    
00832                 ".align 16       \n\t"  
00833                 "1:mov  (%%edx), %%al \n\t"      
00834                 "mulb (%%esi)       \n\t"       
00835                 "mov %%al, (%%edi)  \n\t"       
00836                 "inc %%edx \n\t"                
00837                 "inc %%esi \n\t"                
00838                 "inc %%edi \n\t" "dec %%ecx      \n\t"  
00839                 "jnz 1b         \n\t"     
00840                 "popa                   \n\t":"=m" (Dest)       
00841                 :"m"(Src2),             
00842                 "m"(Src1),              
00843                 "m"(SrcLength)          
00844                 );
00845 #endif
00846         return (0);
00847 #else
00848         return (-1);
00849 #endif
00850 }
00851 
00862 int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00863 {
00864         unsigned int i, istart;
00865         unsigned char *cursrc1, *cursrc2, *curdst;
00866         int result;
00867 
00868         
00869         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00870                 return(-1);
00871         if (length == 0)
00872                 return(0);
00873 
00874         if (SDL_imageFilterMMXdetect()) {
00875                 if (length > 0) {
00876                         
00877                         SDL_imageFilterMultNorASM(Src1, Src2, Dest, length);
00878 
00879                         
00880                         if ((length & 7) > 0) {
00881                                 
00882                                 istart = length & 0xfffffff8;
00883                                 cursrc1 = &Src1[istart];
00884                                 cursrc2 = &Src2[istart];
00885                                 curdst = &Dest[istart];
00886                         } else {
00887                                 
00888                                 return (0);
00889                         }
00890                 } else {
00891                         
00892                         return (0);
00893                 }
00894         } else {
00895                 
00896                 istart = 0;
00897                 cursrc1 = Src1;
00898                 cursrc2 = Src2;
00899                 curdst = Dest;
00900         }
00901 
00902         
00903         for (i = istart; i < length; i++) {
00904                 result = (int) *cursrc1 * (int) *cursrc2;
00905                 *curdst = (unsigned char) result;
00906                 
00907                 cursrc1++;
00908                 cursrc2++;
00909                 curdst++;
00910         }
00911 
00912         return (0);
00913 }
00914 
00925 int SDL_imageFilterMultDivby2MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00926 {
00927 #ifdef USE_MMX
00928 #if !defined(GCC__)
00929         __asm
00930         { 
00931                 pusha
00932                         mov eax, Src1           
00933                         mov ebx, Src2           
00934                         mov edi, Dest           
00935                         mov ecx,  SrcLength     
00936                         shr ecx,  3     
00937                         pxor mm0,  mm0  
00938                         align 16                
00939 L1015:
00940                 movq mm1,  [eax]        
00941                 movq mm3,  [ebx]        
00942                 movq mm2,  mm1  
00943                         movq mm4,  mm3  
00944                         punpcklbw mm1,  mm0     
00945                         punpckhbw mm2,  mm0     
00946                         punpcklbw mm3,  mm0     
00947                         punpckhbw mm4,  mm0     
00948                         psrlw mm1,  1   
00949                         psrlw mm2,  1   
00950                         pmullw mm1,  mm3        
00951                         pmullw mm2,  mm4        
00952                         packuswb mm1,  mm2      
00953                         movq [edi],  mm1        
00954                         add eax,  8     
00955                         add ebx,  8     
00956                         add edi,  8
00957                         dec ecx         
00958                         jnz L1015               
00959                         emms                    
00960                         popa
00961         }
00962 #else
00963         asm volatile
00964                 ("pusha \n\t" "mov %2, %%eax \n\t"      
00965                 "mov %1, %%ebx \n\t"    
00966                 "mov %0, %%edi \n\t"    
00967                 "mov %3, %%ecx \n\t"    
00968                 "shr $3, %%ecx \n\t"    
00969                 "pxor      %%mm0, %%mm0 \n\t"   
00970                 ".align 16       \n\t"  
00971                 "1: movq (%%eax), %%mm1 \n\t"   
00972                 "movq    (%%ebx), %%mm3 \n\t"   
00973                 "movq      %%mm1, %%mm2 \n\t"   
00974                 "movq      %%mm3, %%mm4 \n\t"   
00975                 "punpcklbw %%mm0, %%mm1 \n\t"   
00976                 "punpckhbw %%mm0, %%mm2 \n\t"   
00977                 "punpcklbw %%mm0, %%mm3 \n\t"   
00978                 "punpckhbw %%mm0, %%mm4 \n\t"   
00979                 "psrlw        $1, %%mm1 \n\t"   
00980                 "psrlw        $1, %%mm2 \n\t"   
00981                 "pmullw    %%mm3, %%mm1 \n\t"   
00982                 "pmullw    %%mm4, %%mm2 \n\t"   
00983                 "packuswb  %%mm2, %%mm1 \n\t"   
00984                 "movq    %%mm1, (%%edi) \n\t"   
00985                 "add $8, %%eax \n\t"    
00986                 "add $8, %%ebx \n\t"    
00987                 "add $8, %%edi \n\t" "dec %%ecx     \n\t"       
00988                 "jnz 1b        \n\t"    
00989                 "emms          \n\t"    
00990                 "popa \n\t":"=m" (Dest) 
00991                 :"m"(Src2),             
00992                 "m"(Src1),              
00993                 "m"(SrcLength)          
00994                 );
00995 #endif
00996         return (0);
00997 #else
00998         return (-1);
00999 #endif
01000 }
01001 
01012 int SDL_imageFilterMultDivby2(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01013 {
01014         unsigned int i, istart;
01015         unsigned char *cursrc1, *cursrc2, *curdst;
01016         int result;
01017 
01018         
01019         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01020                 return(-1);
01021         if (length == 0)
01022                 return(0);
01023 
01024         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01025                 
01026                 SDL_imageFilterMultDivby2MMX(Src1, Src2, Dest, length);
01027 
01028                 
01029                 if ((length & 7) > 0) {
01030                         
01031                         istart = length & 0xfffffff8;
01032                         cursrc1 = &Src1[istart];
01033                         cursrc2 = &Src2[istart];
01034                         curdst = &Dest[istart];
01035                 } else {
01036                         
01037                         return (0);
01038                 }
01039         } else {
01040                 
01041                 istart = 0;
01042                 cursrc1 = Src1;
01043                 cursrc2 = Src2;
01044                 curdst = Dest;
01045         }
01046 
01047         
01048         for (i = istart; i < length; i++) {
01049                 result = ((int) *cursrc1 / 2) * (int) *cursrc2;
01050                 if (result > 255)
01051                         result = 255;
01052                 *curdst = (unsigned char) result;
01053                 
01054                 cursrc1++;
01055                 cursrc2++;
01056                 curdst++;
01057         }
01058 
01059         return (0);
01060 }
01061 
01072 int SDL_imageFilterMultDivby4MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
01073 {
01074 #ifdef USE_MMX
01075 #if !defined(GCC__)
01076         __asm
01077         {
01078                 pusha
01079                         mov eax, Src1           
01080                         mov ebx, Src2           
01081                         mov edi, Dest           
01082                         mov ecx, SrcLength      
01083                         shr ecx,  3     
01084                         pxor mm0, mm0           
01085                         align 16                
01086 L1016:
01087                 movq mm1, [eax]         
01088                 movq mm3, [ebx]         
01089                 movq mm2, mm1           
01090                         movq mm4, mm3           
01091                         punpcklbw mm1, mm0      
01092                         punpckhbw mm2, mm0      
01093                         punpcklbw mm3, mm0      
01094                         punpckhbw mm4, mm0      
01095                         psrlw mm1, 1    
01096                         psrlw mm2, 1    
01097                         psrlw mm3, 1    
01098                         psrlw mm4, 1    
01099                         pmullw mm1, mm3         
01100                         pmullw mm2, mm4         
01101                         packuswb mm1, mm2       
01102                         movq [edi], mm1         
01103                         add eax, 8      
01104                         add ebx, 8      
01105                         add edi,  8
01106                         dec ecx         
01107                         jnz L1016               
01108                         emms                    
01109                         popa
01110         }
01111 #else
01112         asm volatile
01113                 ("pusha              \n\t" "mov %2, %%eax \n\t" 
01114                 "mov %1, %%ebx \n\t"    
01115                 "mov %0, %%edi \n\t"    
01116                 "mov %3, %%ecx \n\t"    
01117                 "shr $3, %%ecx \n\t"    
01118                 "pxor      %%mm0, %%mm0 \n\t"   
01119                 ".align 16       \n\t"  
01120                 "1: movq (%%eax), %%mm1 \n\t"   
01121                 "movq    (%%ebx), %%mm3 \n\t"   
01122                 "movq      %%mm1, %%mm2 \n\t"   
01123                 "movq      %%mm3, %%mm4 \n\t"   
01124                 "punpcklbw %%mm0, %%mm1 \n\t"   
01125                 "punpckhbw %%mm0, %%mm2 \n\t"   
01126                 "punpcklbw %%mm0, %%mm3 \n\t"   
01127                 "punpckhbw %%mm0, %%mm4 \n\t"   
01128                 "psrlw        $1, %%mm1 \n\t"   
01129                 "psrlw        $1, %%mm2 \n\t"   
01130                 "psrlw        $1, %%mm3 \n\t"   
01131                 "psrlw        $1, %%mm4 \n\t"   
01132                 "pmullw    %%mm3, %%mm1 \n\t"   
01133                 "pmullw    %%mm4, %%mm2 \n\t"   
01134                 "packuswb  %%mm2, %%mm1 \n\t"   
01135                 "movq    %%mm1, (%%edi) \n\t"   
01136                 "add $8, %%eax \n\t"    
01137                 "add $8, %%ebx \n\t"    
01138                 "add $8, %%edi \n\t" "dec %%ecx     \n\t"       
01139                 "jnz 1b        \n\t"    
01140                 "emms          \n\t"    
01141                 "popa                   \n\t":"=m" (Dest)       
01142                 :"m"(Src2),             
01143                 "m"(Src1),              
01144                 "m"(SrcLength)          
01145                 );
01146 #endif
01147         return (0);
01148 #else
01149         return (-1);
01150 #endif
01151 }
01152 
01163 int SDL_imageFilterMultDivby4(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01164 {
01165         unsigned int i, istart;
01166         unsigned char *cursrc1, *cursrc2, *curdst;
01167         int result;
01168 
01169         
01170         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01171                 return(-1);
01172         if (length == 0)
01173                 return(0);
01174 
01175         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01176                 
01177                 SDL_imageFilterMultDivby4MMX(Src1, Src2, Dest, length);
01178 
01179                 
01180                 if ((length & 7) > 0) {
01181                         
01182                         istart = length & 0xfffffff8;
01183                         cursrc1 = &Src1[istart];
01184                         cursrc2 = &Src2[istart];
01185                         curdst = &Dest[istart];
01186                 } else {
01187                         
01188                         return (0);
01189                 }
01190         } else {
01191                 
01192                 istart = 0;
01193                 cursrc1 = Src1;
01194                 cursrc2 = Src2;
01195                 curdst = Dest;
01196         }
01197 
01198         
01199         for (i = istart; i < length; i++) {
01200                 result = ((int) *cursrc1 / 2) * ((int) *cursrc2 / 2);
01201                 if (result > 255)
01202                         result = 255;
01203                 *curdst = (unsigned char) result;
01204                 
01205                 cursrc1++;
01206                 cursrc2++;
01207                 curdst++;
01208         }
01209 
01210         return (0);
01211 }
01212 
01223 int SDL_imageFilterBitAndMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
01224 {
01225 #ifdef USE_MMX
01226 #if !defined(GCC__)
01227         __asm
01228         {
01229                 pusha
01230                         mov eax, Src1           
01231                         mov ebx, Src2           
01232                         mov edi, Dest           
01233                         mov ecx, SrcLength      
01234                         shr ecx, 3      
01235                         align 16                
01236 L1017:
01237                 movq mm1, [eax]         
01238                 pand mm1, [ebx]         
01239                 movq [edi], mm1         
01240                         add eax, 8      
01241                         add ebx, 8      
01242                         add edi, 8
01243                         dec ecx         
01244                         jnz L1017               
01245                         emms                    
01246                         popa
01247         }
01248 #else
01249         asm volatile
01250                 ("pusha              \n\t" "mov %2, %%eax \n\t" 
01251                 "mov %1, %%ebx \n\t"    
01252                 "mov %0, %%edi \n\t"    
01253                 "mov %3, %%ecx \n\t"    
01254                 "shr $3, %%ecx \n\t"    
01255                 ".align 16       \n\t"  
01256                 "1: movq (%%eax), %%mm1 \n\t"   
01257                 "pand    (%%ebx), %%mm1 \n\t"   
01258                 "movq    %%mm1, (%%edi) \n\t"   
01259                 "add $8, %%eax \n\t"    
01260                 "add $8, %%ebx \n\t"    
01261                 "add $8, %%edi \n\t" "dec %%ecx     \n\t"       
01262                 "jnz 1b        \n\t"    
01263                 "emms          \n\t"    
01264                 "popa                   \n\t":"=m" (Dest)       
01265                 :"m"(Src2),             
01266                 "m"(Src1),              
01267                 "m"(SrcLength)          
01268                 );
01269 #endif
01270         return (0);
01271 #else
01272         return (-1);
01273 #endif
01274 }
01275 
01286 int SDL_imageFilterBitAnd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01287 {
01288         unsigned int i, istart;
01289         unsigned char *cursrc1, *cursrc2, *curdst;
01290 
01291         
01292         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01293                 return(-1);
01294         if (length == 0)
01295                 return(0);
01296 
01297         if ((SDL_imageFilterMMXdetect()>0) && (length>7)) {
01298                 
01299                 
01300 
01301                 SDL_imageFilterBitAndMMX(Src1, Src2, Dest, length);
01302 
01303                 
01304                 if ((length & 7) > 0) {
01305 
01306                         
01307                         istart = length & 0xfffffff8;
01308                         cursrc1 = &Src1[istart];
01309                         cursrc2 = &Src2[istart];
01310                         curdst = &Dest[istart];
01311                 } else {
01312                         
01313                         return (0);
01314                 }
01315         } else {
01316                 
01317                 istart = 0;
01318                 cursrc1 = Src1;
01319                 cursrc2 = Src2;
01320                 curdst = Dest;
01321         }
01322 
01323         
01324         for (i = istart; i < length; i++) {
01325                 *curdst = (*cursrc1) & (*cursrc2);
01326                 
01327                 cursrc1++;
01328                 cursrc2++;
01329                 curdst++;
01330         }
01331 
01332         return (0);
01333 }
01334 
01345 int SDL_imageFilterBitOrMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
01346 {
01347 #ifdef USE_MMX
01348 #if !defined(GCC__)
01349         __asm
01350         {
01351                 pusha
01352                         mov eax, Src1           
01353                         mov ebx, Src2           
01354                         mov edi, Dest           
01355                         mov ecx, SrcLength      
01356                         shr ecx,  3     
01357                         align 16                
01358 L91017:
01359                 movq mm1, [eax]         
01360                 por mm1, [ebx]          
01361                 movq [edi], mm1         
01362                         add eax, 8      
01363                         add ebx, 8      
01364                         add edi,  8
01365                         dec ecx         
01366                         jnz L91017              
01367                         emms                    
01368                         popa
01369         }
01370 #else
01371         asm volatile
01372                 ("pusha              \n\t" "mov %2, %%eax \n\t" 
01373                 "mov %1, %%ebx \n\t"    
01374                 "mov %0, %%edi \n\t"    
01375                 "mov %3, %%ecx \n\t"    
01376                 "shr $3, %%ecx \n\t"    
01377                 ".align 16       \n\t"  
01378                 "1: movq (%%eax), %%mm1 \n\t"   
01379                 "por     (%%ebx), %%mm1 \n\t"   
01380                 "movq    %%mm1, (%%edi) \n\t"   
01381                 "add $8, %%eax \n\t"    
01382                 "add $8, %%ebx \n\t"    
01383                 "add $8, %%edi \n\t" "dec %%ecx     \n\t"       
01384                 "jnz 1b        \n\t"    
01385                 "emms          \n\t"    
01386                 "popa                   \n\t":"=m" (Dest)       
01387                 :"m"(Src2),             
01388                 "m"(Src1),              
01389                 "m"(SrcLength)          
01390                 );
01391 #endif
01392         return (0);
01393 #else
01394         return (-1);
01395 #endif
01396 }
01397 
01408 int SDL_imageFilterBitOr(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01409 {
01410         unsigned int i, istart;
01411         unsigned char *cursrc1, *cursrc2, *curdst;
01412 
01413         
01414         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01415                 return(-1);
01416         if (length == 0)
01417                 return(0);
01418 
01419         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01420 
01421                 
01422                 SDL_imageFilterBitOrMMX(Src1, Src2, Dest, length);
01423 
01424                 
01425                 if ((length & 7) > 0) {
01426                         
01427                         istart = length & 0xfffffff8;
01428                         cursrc1 = &Src1[istart];
01429                         cursrc2 = &Src2[istart];
01430                         curdst = &Dest[istart];
01431                 } else {
01432                         
01433                         return (0);
01434                 }
01435         } else {
01436                 
01437                 istart = 0;
01438                 cursrc1 = Src1;
01439                 cursrc2 = Src2;
01440                 curdst = Dest;
01441         }
01442 
01443         
01444         for (i = istart; i < length; i++) {
01445                 *curdst = *cursrc1 | *cursrc2;
01446                 
01447                 cursrc1++;
01448                 cursrc2++;
01449                 curdst++;
01450         }
01451         return (0);
01452 }
01453 
01464 int SDL_imageFilterDivASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
01465 {
01466 #ifdef USE_MMX
01467 #if !defined(GCC__)
01468         __asm
01469         {
01470                 pusha
01471                         mov edx, Src1           
01472                         mov esi, Src2           
01473                         mov edi, Dest           
01474                         mov ecx, SrcLength      
01475                         align 16                
01476 L10191:
01477                 mov bl, [esi]           
01478                 cmp bl, 0       
01479                         jnz L10192
01480                         mov [edi], 255          
01481                         jmp  L10193
01482 L10192:
01483                 xor ah, ah      
01484                         mov al, [edx]           
01485                 div   bl                
01486                         mov [edi], al           
01487 L10193:
01488                 inc edx         
01489                         inc esi                 
01490                         inc edi
01491                         dec ecx         
01492                         jnz L10191      
01493                         popa
01494         }
01495 #else
01496         asm volatile
01497                 ("pusha \n\t" "mov %2, %%edx \n\t"      
01498                 "mov %1, %%esi \n\t"    
01499                 "mov %0, %%edi \n\t"    
01500                 "mov %3, %%ecx \n\t"    
01501                 ".align 16     \n\t"    
01502                 "1: mov (%%esi), %%bl  \n\t"    
01503                 "cmp       $0, %%bl  \n\t"      
01504                 "jnz 2f              \n\t" "movb  $255, (%%edi) \n\t"   
01505                 "jmp 3f              \n\t" "2:                  \n\t" "xor   %%ah, %%ah    \n\t"        
01506                 "mov   (%%edx), %%al \n\t"      
01507                 "div   %%bl          \n\t"      
01508                 "mov   %%al, (%%edi) \n\t"      
01509                 "3: inc %%edx        \n\t"      
01510                 "inc %%esi \n\t"                
01511                 "inc %%edi \n\t" "dec %%ecx    \n\t"    
01512                 "jnz 1b       \n\t"     
01513                 "popa \n\t":"=m" (Dest) 
01514                 :"m"(Src2),             
01515                 "m"(Src1),              
01516                 "m"(SrcLength)          
01517                 );
01518 #endif
01519         return (0);
01520 #else
01521         return (-1);
01522 #endif
01523 }
01524 
01535 int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01536 {
01537         unsigned int i, istart;
01538         unsigned char *cursrc1, *cursrc2, *curdst;
01539         int result;
01540 
01541         
01542         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01543                 return(-1);
01544         if (length == 0)
01545                 return(0);
01546 
01547         if (SDL_imageFilterMMXdetect()) {
01548                 if (length > 0) {
01549                         
01550                         SDL_imageFilterDivASM(Src1, Src2, Dest, length);
01551 
01552                         
01553                         return (0);
01554                 } else {
01555                         return (-1);
01556                 }
01557         } 
01558         
01559         
01560         istart = 0;
01561         cursrc1 = Src1;
01562         cursrc2 = Src2;
01563         curdst = Dest;
01564 
01565         
01566         for (i = istart; i < length; i++) {
01567                 result = (int) *cursrc1 / (int) *cursrc2;
01568                 *curdst = (unsigned char) result;
01569                 
01570                 cursrc1++;
01571                 cursrc2++;
01572                 curdst++;
01573         }
01574 
01575         return (0);
01576 }
01577 
01578 
01579 
01589 int SDL_imageFilterBitNegationMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength)
01590 {
01591 #ifdef USE_MMX
01592 #if !defined(GCC__)
01593         __asm
01594         {
01595                 pusha
01596                         pcmpeqb mm1, mm1        
01597                         mov eax, Src1           
01598                         mov edi, Dest           
01599                         mov ecx, SrcLength      
01600                         shr ecx,  3     
01601                         align 16                
01602 L91117:
01603                 movq mm0, [eax]         
01604                 pxor mm0, mm1           
01605                         movq [edi], mm0         
01606                         add eax, 8      
01607                         add edi,  8
01608                         dec ecx         
01609                         jnz L91117              
01610                         emms                    
01611                         popa
01612         }
01613 #else
01614         asm volatile
01615                 ("pusha              \n\t" "pcmpeqb   %%mm1, %%mm1 \n\t"        
01616                 "mov %1, %%eax \n\t"    
01617                 "mov %0, %%edi \n\t"    
01618                 "mov %2, %%ecx \n\t"    
01619                 "shr $3, %%ecx \n\t"    
01620                 ".align 16       \n\t"  
01621                 "1: movq (%%eax), %%mm0 \n\t"   
01622                 "pxor      %%mm1, %%mm0 \n\t"   
01623                 "movq    %%mm0, (%%edi) \n\t"   
01624                 "add $8, %%eax \n\t"    
01625                 "add $8, %%edi \n\t" "dec %%ecx     \n\t"       
01626                 "jnz 1b        \n\t"    
01627                 "emms          \n\t"    
01628                 "popa                   \n\t":"=m" (Dest)       
01629                 :"m"(Src1),             
01630                 "m"(SrcLength)          
01631                 );
01632 #endif
01633         return (0);
01634 #else
01635         return (-1);
01636 #endif
01637 }
01638 
01648 int SDL_imageFilterBitNegation(unsigned char *Src1, unsigned char *Dest, unsigned int length)
01649 {
01650         unsigned int i, istart;
01651         unsigned char *cursrc1, *curdst;
01652 
01653         
01654         if ((Src1 == NULL) || (Dest == NULL))
01655                 return(-1);
01656         if (length == 0)
01657                 return(0);
01658 
01659         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01660                 
01661                 SDL_imageFilterBitNegationMMX(Src1, Dest, length);
01662 
01663                 
01664                 if ((length & 7) > 0) {
01665                         
01666                         istart = length & 0xfffffff8;
01667                         cursrc1 = &Src1[istart];
01668                         curdst = &Dest[istart];
01669                 } else {
01670                         
01671                         return (0);
01672                 }
01673         } else {
01674                 
01675                 istart = 0;
01676                 cursrc1 = Src1;
01677                 curdst = Dest;
01678         }
01679 
01680         
01681         for (i = istart; i < length; i++) {
01682                 *curdst = ~(*cursrc1);
01683                 
01684                 cursrc1++;
01685                 curdst++;
01686         }
01687 
01688         return (0);
01689 }
01690 
01701 int SDL_imageFilterAddByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
01702 {
01703 #ifdef USE_MMX
01704 #if !defined(GCC__)
01705         __asm
01706         {
01707                 pusha
01708                         
01709                         mov al, C       
01710                         mov ah, al      
01711                         mov bx, ax      
01712                         shl eax, 16     
01713                         mov ax, bx      
01714                         movd mm1, eax           
01715                         movd mm2, eax           
01716                         punpckldq mm1, mm2      
01717                         mov eax, Src1           
01718                         mov edi, Dest           
01719                         mov ecx, SrcLength      
01720                         shr ecx,  3     
01721                         align 16                        
01722 L1021:
01723                 movq mm0, [eax]         
01724                 paddusb mm0,  mm1       
01725                         movq [edi], mm0         
01726                         add eax, 8      
01727                         add edi, 8      
01728                         dec              ecx            
01729                         jnz             L1021           
01730                         emms                            
01731                         popa
01732         }
01733 #else
01734         asm volatile
01735                 ("pusha              \n\t"
01736                 
01737                 "mov           %3, %%al \n\t"   
01738                 "mov         %%al, %%ah \n\t"   
01739                 "mov         %%ax, %%bx \n\t"   
01740                 "shl         $16, %%eax \n\t"   
01741                 "mov         %%bx, %%ax \n\t"   
01742                 "movd      %%eax, %%mm1 \n\t"   
01743                 "movd      %%eax, %%mm2 \n\t"   
01744                 "punpckldq %%mm2, %%mm1 \n\t"   
01745                 "mov          %1, %%eax \n\t"   
01746                 "mov          %0, %%edi \n\t"   
01747                 "mov          %2, %%ecx \n\t"   
01748                 "shr          $3, %%ecx \n\t"   
01749                 ".align 16              \n\t"   
01750                 "1:                     \n\t" 
01751                 "movq    (%%eax), %%mm0 \n\t"   
01752                 "paddusb   %%mm1, %%mm0 \n\t"   
01753                 "movq    %%mm0, (%%edi) \n\t"   
01754                 "add          $8, %%eax \n\t"   
01755                 "add          $8, %%edi \n\t"   
01756                 "dec              %%ecx \n\t"   
01757                 "jnz                 1b \n\t"   
01758                 "emms                   \n\t"   
01759                 "popa                   \n\t":"=m" (Dest)       
01760                 :"m"(Src1),             
01761                 "m"(SrcLength),         
01762                 "m"(C)                  
01763                 );
01764 #endif
01765         return (0);
01766 #else
01767         return (-1);
01768 #endif
01769 }
01770 
01782 int SDL_imageFilterAddByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
01783 {
01784         unsigned int i, istart;
01785         int iC;
01786         unsigned char *cursrc1, *curdest;
01787         int result;
01788 
01789         
01790         if ((Src1 == NULL) || (Dest == NULL))
01791                 return(-1);
01792         if (length == 0)
01793                 return(0);
01794 
01795         
01796         if (C == 0) {
01797                 memcpy(Src1, Dest, length);
01798                 return (0); 
01799         }
01800 
01801         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01802 
01803                 
01804                 SDL_imageFilterAddByteMMX(Src1, Dest, length, C);
01805 
01806                 
01807                 if ((length & 7) > 0) {
01808                         
01809                         istart = length & 0xfffffff8;
01810                         cursrc1 = &Src1[istart];
01811                         curdest = &Dest[istart];
01812                 } else {
01813                         
01814                         return (0);
01815                 }
01816         } else {
01817                 
01818                 istart = 0;
01819                 cursrc1 = Src1;
01820                 curdest = Dest;
01821         }
01822 
01823         
01824         iC = (int) C;
01825         for (i = istart; i < length; i++) {
01826                 result = (int) *cursrc1 + iC;
01827                 if (result > 255)
01828                         result = 255;
01829                 *curdest = (unsigned char) result;
01830                 
01831                 cursrc1++;
01832                 curdest++;
01833         }
01834         return (0);
01835 }
01836 
01848 int SDL_imageFilterAddUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)
01849 {
01850 #ifdef USE_MMX
01851 #if !defined(GCC__)
01852         __asm
01853         {
01854                 pusha
01855                         
01856                         mov eax, C      
01857                         movd mm1, eax           
01858                         mov eax, D      
01859                         movd mm2, eax           
01860                         punpckldq mm1, mm2      
01861                         mov eax, Src1           
01862                         mov edi, Dest           
01863                         mov ecx, SrcLength      
01864                         shr ecx,  3     
01865                         align 16                        
01866 L11023:
01867                 movq mm0, [eax]         
01868                 paddusb mm0,  mm1       
01869                         movq [edi],  mm0        
01870                         add eax, 8      
01871                         add edi, 8      
01872                         dec              ecx            
01873                         jnz             L11023          
01874                         emms                            
01875                         popa
01876         }
01877 #else
01878         asm volatile
01879                 ("pusha              \n\t"
01880                 
01881                 "mov          %3, %%eax \n\t"   
01882                 "movd      %%eax, %%mm1 \n\t"   
01883                 "mov          %4, %%eax \n\t"   
01884                 "movd      %%eax, %%mm2 \n\t"   
01885                 "punpckldq %%mm2, %%mm1 \n\t"   
01886                 "mov          %1, %%eax \n\t"   
01887                 "mov          %0, %%edi \n\t"   
01888                 "mov          %2, %%ecx \n\t"   
01889                 "shr          $3, %%ecx \n\t"   
01890                 ".align 16              \n\t"   
01891                 "1:                     \n\t" 
01892                 "movq    (%%eax), %%mm0 \n\t"   
01893                 "paddusb   %%mm1, %%mm0 \n\t"   
01894                 "movq    %%mm0, (%%edi) \n\t"   
01895                 "add          $8, %%eax \n\t"   
01896                 "add          $8, %%edi \n\t"   
01897                 "dec              %%ecx \n\t"   
01898                 "jnz                 1b \n\t"   
01899                 "emms                   \n\t"   
01900                 "popa                   \n\t":"=m" (Dest)       
01901                 :"m"(Src1),             
01902                 "m"(SrcLength),         
01903                 "m"(C),                 
01904                 "m"(D)                  
01905                 );
01906 #endif
01907         return (0);
01908 #else
01909         return (-1);
01910 #endif
01911 }
01912 
01923 int SDL_imageFilterAddUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)
01924 {
01925         unsigned int i, j, istart, D;
01926         int iC[4];
01927         unsigned char *cursrc1;
01928         unsigned char *curdest;
01929         int result;
01930 
01931         
01932         if ((Src1 == NULL) || (Dest == NULL))
01933                 return(-1);
01934         if (length == 0)
01935                 return(0);
01936 
01937         
01938         if (C == 0) {
01939                 memcpy(Src1, Dest, length);
01940                 return (0); 
01941         }
01942 
01943         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01944 
01945                 
01946                 D=SWAP_32(C);
01947                 SDL_imageFilterAddUintMMX(Src1, Dest, length, C, D);
01948 
01949                 
01950                 if ((length & 7) > 0) {
01951                         
01952                         istart = length & 0xfffffff8;
01953                         cursrc1 = &Src1[istart];
01954                         curdest = &Dest[istart];
01955                 } else {
01956                         
01957                         return (0);
01958                 }
01959         } else {
01960                 
01961                 istart = 0;
01962                 cursrc1 = Src1;
01963                 curdest = Dest;
01964         }
01965 
01966         
01967         iC[3] = (int) ((C >> 24) & 0xff);
01968         iC[2] = (int) ((C >> 16) & 0xff);
01969         iC[1] = (int) ((C >>  8) & 0xff);
01970         iC[0] = (int) ((C >>  0) & 0xff);
01971         for (i = istart; i < length; i += 4) {
01972                 for (j = 0; j < 4; j++) {
01973                         if ((i+j)<length) {
01974                                 result = (int) *cursrc1 + iC[j];
01975                                 if (result > 255) result = 255;
01976                                 *curdest = (unsigned char) result;
01977                                 
01978                                 cursrc1++;
01979                                 curdest++;
01980                         }
01981                 }
01982         }
01983         return (0);
01984 }
01985 
01997 int SDL_imageFilterAddByteToHalfMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C,
01998                                                                         unsigned char *Mask)
01999 {
02000 #ifdef USE_MMX
02001 #if !defined(GCC__)
02002         __asm
02003         {
02004                 pusha
02005                         
02006                         mov al, C       
02007                         mov ah, al      
02008                         mov bx, ax      
02009                         shl eax, 16     
02010                         mov ax, bx      
02011                         movd mm1, eax           
02012                         movd mm2, eax           
02013                         punpckldq mm1, mm2      
02014                         mov edx, Mask           
02015                         movq mm0, [edx]         
02016                 mov eax, Src1           
02017                         mov edi, Dest           
02018                         mov ecx,  SrcLength     
02019                         shr ecx,  3     
02020                         align 16                        
02021 L1022:
02022                 movq mm2, [eax]         
02023                 psrlw mm2, 1    
02024                         pand mm2, mm0        
02025                         
02026                         paddusb mm2,  mm1       
02027                         movq [edi], mm2         
02028                         add eax, 8      
02029                         add edi, 8      
02030                         dec              ecx            
02031                         jnz             L1022           
02032                         emms                            
02033                         popa
02034         }
02035 #else
02036         asm volatile
02037                 ("pusha              \n\t"
02038                 
02039                 "mov           %3, %%al \n\t"   
02040                 "mov         %%al, %%ah \n\t"   
02041                 "mov         %%ax, %%bx \n\t"   
02042                 "shl         $16, %%eax \n\t"   
02043                 "mov         %%bx, %%ax \n\t"   
02044                 "movd      %%eax, %%mm1 \n\t"   
02045                 "movd      %%eax, %%mm2 \n\t"   
02046                 "punpckldq %%mm2, %%mm1 \n\t"   
02047                 "movl         %4, %%edx \n\t"   
02048                 "movq    (%%edx), %%mm0 \n\t"   
02049                 "mov          %1, %%eax \n\t"   
02050                 "mov          %0, %%edi \n\t"   
02051                 "mov          %2, %%ecx \n\t"   
02052                 "shr          $3, %%ecx \n\t"   
02053                 ".align 16              \n\t"   
02054                 "1:                     \n\t" 
02055                 "movq    (%%eax), %%mm2 \n\t"   
02056                 "psrlw        $1, %%mm2 \n\t"   
02057                 
02058                 ".byte     0x0f, 0xdb, 0xd0 \n\t" 
02059                 "paddusb   %%mm1, %%mm2 \n\t"   
02060                 "movq    %%mm2, (%%edi) \n\t"   
02061                 "add          $8, %%eax \n\t"   
02062                 "add          $8, %%edi \n\t"   
02063                 "dec              %%ecx \n\t"   
02064                 "jnz                  1b \n\t"  
02065                 "emms                   \n\t"   
02066                 "popa                   \n\t":"=m" (Dest)       
02067                 :"m"(Src1),             
02068                 "m"(SrcLength),         
02069                 "m"(C),                 
02070                 "m"(Mask)                       
02071                 );
02072 #endif
02073         return (0);
02074 #else
02075         return (-1);
02076 #endif
02077 }
02078 
02089 int SDL_imageFilterAddByteToHalf(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
02090 {
02091         static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
02092         unsigned int i, istart;
02093         int iC;
02094         unsigned char *cursrc1;
02095         unsigned char *curdest;
02096         int result;
02097 
02098         
02099         if ((Src1 == NULL) || (Dest == NULL))
02100                 return(-1);
02101         if (length == 0)
02102                 return(0);
02103 
02104         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02105 
02106                 
02107                 SDL_imageFilterAddByteToHalfMMX(Src1, Dest, length, C, Mask);
02108 
02109                 
02110                 if ((length & 7) > 0) {
02111                         
02112                         istart = length & 0xfffffff8;
02113                         cursrc1 = &Src1[istart];
02114                         curdest = &Dest[istart];
02115                 } else {
02116                         
02117                         return (0);
02118                 }
02119         } else {
02120                 
02121                 istart = 0;
02122                 cursrc1 = Src1;
02123                 curdest = Dest;
02124         }
02125 
02126         
02127         iC = (int) C;
02128         for (i = istart; i < length; i++) {
02129                 result = (int) (*cursrc1 / 2) + iC;
02130                 if (result > 255)
02131                         result = 255;
02132                 *curdest = (unsigned char) result;
02133                 
02134                 cursrc1++;
02135                 curdest++;
02136         }
02137 
02138         return (0);
02139 }
02140 
02151 int SDL_imageFilterSubByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
02152 {
02153 #ifdef USE_MMX
02154 #if !defined(GCC__)
02155         __asm
02156         {
02157                 pusha
02158                         
02159                         mov al, C       
02160                         mov ah, al      
02161                         mov bx, ax      
02162                         shl eax, 16     
02163                         mov ax, bx      
02164                         movd mm1, eax           
02165                         movd mm2, eax           
02166                         punpckldq mm1, mm2      
02167                         mov eax, Src1           
02168                         mov edi, Dest           
02169                         mov ecx,  SrcLength     
02170                         shr ecx,  3     
02171                         align 16                        
02172 L1023:
02173                 movq mm0, [eax]         
02174                 psubusb mm0,  mm1       
02175                         movq [edi], mm0         
02176                         add eax, 8      
02177                         add edi, 8      
02178                         dec              ecx            
02179                         jnz             L1023           
02180                         emms                            
02181                         popa
02182         }
02183 #else
02184         asm volatile
02185                 ("pusha              \n\t"
02186                 
02187                 "mov           %3, %%al \n\t"   
02188                 "mov         %%al, %%ah \n\t"   
02189                 "mov         %%ax, %%bx \n\t"   
02190                 "shl         $16, %%eax \n\t"   
02191                 "mov         %%bx, %%ax \n\t"   
02192                 "movd      %%eax, %%mm1 \n\t"   
02193                 "movd      %%eax, %%mm2 \n\t"   
02194                 "punpckldq %%mm2, %%mm1 \n\t"   
02195                 "mov          %1, %%eax \n\t"   
02196                 "mov          %0, %%edi \n\t"   
02197                 "mov          %2, %%ecx \n\t"   
02198                 "shr          $3, %%ecx \n\t"   
02199                 ".align 16              \n\t"   
02200                 "1: movq (%%eax), %%mm0 \n\t"   
02201                 "psubusb   %%mm1, %%mm0 \n\t"   
02202                 "movq    %%mm0, (%%edi) \n\t"   
02203                 "add          $8, %%eax \n\t"   
02204                 "add          $8, %%edi \n\t"   
02205                 "dec              %%ecx \n\t"   
02206                 "jnz                 1b \n\t"   
02207                 "emms                   \n\t"   
02208                 "popa                   \n\t":"=m" (Dest)       
02209                 :"m"(Src1),             
02210                 "m"(SrcLength),         
02211                 "m"(C)                  
02212                 );
02213 #endif
02214         return (0);
02215 #else
02216         return (-1);
02217 #endif
02218 }
02219 
02230 int SDL_imageFilterSubByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
02231 {
02232         unsigned int i, istart;
02233         int iC;
02234         unsigned char *cursrc1;
02235         unsigned char *curdest;
02236         int result;
02237 
02238         
02239         if ((Src1 == NULL) || (Dest == NULL))
02240                 return(-1);
02241         if (length == 0)
02242                 return(0);
02243 
02244         
02245         if (C == 0) {
02246                 memcpy(Src1, Dest, length);
02247                 return (0); 
02248         }
02249 
02250         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02251 
02252                 
02253                 SDL_imageFilterSubByteMMX(Src1, Dest, length, C);
02254 
02255                 
02256                 if ((length & 7) > 0) {
02257                         
02258                         istart = length & 0xfffffff8;
02259                         cursrc1 = &Src1[istart];
02260                         curdest = &Dest[istart];
02261                 } else {
02262                         
02263                         return (0);
02264                 }
02265         } else {
02266                 
02267                 istart = 0;
02268                 cursrc1 = Src1;
02269                 curdest = Dest;
02270         }
02271 
02272         
02273         iC = (int) C;
02274         for (i = istart; i < length; i++) {
02275                 result = (int) *cursrc1 - iC;
02276                 if (result < 0)
02277                         result = 0;
02278                 *curdest = (unsigned char) result;
02279                 
02280                 cursrc1++;
02281                 curdest++;
02282         }
02283         return (0);
02284 }
02285 
02297 int SDL_imageFilterSubUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)
02298 {
02299 #ifdef USE_MMX
02300 #if !defined(GCC__)
02301         __asm
02302         {
02303                 pusha
02304                         
02305                         mov eax, C      
02306                         movd mm1, eax           
02307                         mov eax, D      
02308                         movd mm2, eax           
02309                         punpckldq mm1, mm2      
02310                         mov eax, Src1           
02311                         mov edi, Dest           
02312                         mov ecx,  SrcLength     
02313                         shr ecx,  3     
02314                         align 16                        
02315 L11024:
02316                 movq mm0, [eax]         
02317                 psubusb mm0, mm1        
02318                         movq [edi], mm0         
02319                         add eax, 8      
02320                         add edi, 8      
02321                         dec              ecx            
02322                         jnz             L11024          
02323                         emms                            
02324                         popa
02325         }
02326 #else
02327         asm volatile
02328                 ("pusha              \n\t"
02329                 
02330                 "mov          %3, %%eax \n\t"   
02331                 "movd      %%eax, %%mm1 \n\t"   
02332                 "mov          %4, %%eax \n\t"   
02333                 "movd      %%eax, %%mm2 \n\t"   
02334                 "punpckldq %%mm2, %%mm1 \n\t"   
02335                 "mov          %1, %%eax \n\t"   
02336                 "mov          %0, %%edi \n\t"   
02337                 "mov          %2, %%ecx \n\t"   
02338                 "shr          $3, %%ecx \n\t"   
02339                 ".align 16              \n\t"   
02340                 "1: movq (%%eax), %%mm0 \n\t"   
02341                 "psubusb   %%mm1, %%mm0 \n\t"   
02342                 "movq    %%mm0, (%%edi) \n\t"   
02343                 "add          $8, %%eax \n\t"   
02344                 "add          $8, %%edi \n\t"   
02345                 "dec              %%ecx \n\t"   
02346                 "jnz                  1b \n\t"  
02347                 "emms                   \n\t"   
02348                 "popa                   \n\t":"=m" (Dest)       
02349                 :"m"(Src1),             
02350                 "m"(SrcLength),         
02351                 "m"(C),                 
02352                 "m"(D)                  
02353                 );
02354 #endif
02355         return (0);
02356 #else
02357         return (-1);
02358 #endif
02359 }
02360 
02371 int SDL_imageFilterSubUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)
02372 {
02373         unsigned int i, j, istart, D;
02374         int iC[4];
02375         unsigned char *cursrc1;
02376         unsigned char *curdest;
02377         int result;
02378 
02379         
02380         if ((Src1 == NULL) || (Dest == NULL))
02381                 return(-1);
02382         if (length == 0)
02383                 return(0);
02384 
02385     
02386         if (C == 0) {
02387                 memcpy(Src1, Dest, length);
02388                 return (0); 
02389         }
02390 
02391         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02392 
02393                 
02394                 D=SWAP_32(C);
02395                 SDL_imageFilterSubUintMMX(Src1, Dest, length, C, D);
02396 
02397                 
02398                 if ((length & 7) > 0) {
02399                         
02400                         istart = length & 0xfffffff8;
02401                         cursrc1 = &Src1[istart];
02402                         curdest = &Dest[istart];
02403                 } else {
02404                         
02405                         return (0);
02406                 }
02407         } else {
02408                 
02409                 istart = 0;
02410                 cursrc1 = Src1;
02411                 curdest = Dest;
02412         }
02413 
02414         
02415         iC[3] = (int) ((C >> 24) & 0xff);
02416         iC[2] = (int) ((C >> 16) & 0xff);
02417         iC[1] = (int) ((C >>  8) & 0xff);
02418         iC[0] = (int) ((C >>  0) & 0xff);
02419         for (i = istart; i < length; i += 4) {
02420                 for (j = 0; j < 4; j++) {
02421                         if ((i+j)<length) {
02422                                 result = (int) *cursrc1 - iC[j];
02423                                 if (result < 0) result = 0;
02424                                 *curdest = (unsigned char) result;
02425                                 
02426                                 cursrc1++;
02427                                 curdest++;
02428                         }
02429                 }
02430         }
02431         return (0);
02432 }
02433 
02445 int SDL_imageFilterShiftRightMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
02446                                                                  unsigned char *Mask)
02447 {
02448 #ifdef USE_MMX
02449 #if !defined(GCC__)
02450         __asm
02451         {
02452                 pusha
02453                         mov edx, Mask           
02454                         movq mm0, [edx]         
02455                 xor ecx, ecx    
02456                         mov cl,  N      
02457                         movd mm3,  ecx  
02458                         pcmpeqb mm1, mm1        
02459 L10240:                         
02460                 psrlw mm1,  1   
02461                         pand mm1, mm0   
02462                         
02463                         dec               cl            
02464                         jnz            L10240           
02465                         
02466                         mov eax, Src1           
02467                         mov edi, Dest           
02468                         mov ecx,  SrcLength     
02469                         shr ecx,  3     
02470                         align 16                        
02471 L10241:
02472                 movq mm0, [eax]         
02473                 psrlw mm0, mm3          
02474                         pand mm0, mm1    
02475                         
02476                         movq [edi], mm0         
02477                         add eax, 8      
02478                         add edi, 8      
02479                         dec              ecx            
02480                         jnz            L10241           
02481                         emms                            
02482                         popa
02483         }
02484 #else
02485         asm volatile
02486                 ("pusha              \n\t" "movl         %4, %%edx \n\t"        
02487                 "movq    (%%edx), %%mm0 \n\t"   
02488                 "xor       %%ecx, %%ecx \n\t"   
02489                 "mov           %3, %%cl \n\t"   
02490                 "movd      %%ecx, %%mm3 \n\t"   
02491                 "pcmpeqb   %%mm1, %%mm1 \n\t"   
02492                 "1:                     \n\t"   
02493                 "psrlw        $1, %%mm1 \n\t"   
02494                 
02495                 ".byte     0x0f, 0xdb, 0xc8 \n\t" 
02496                 "dec               %%cl \n\t"   
02497                 "jnz                 1b \n\t"   
02498                 
02499                 "mov          %1, %%eax \n\t"   
02500                 "mov          %0, %%edi \n\t"   
02501                 "mov          %2, %%ecx \n\t"   
02502                 "shr          $3, %%ecx \n\t"   
02503                 ".align 16              \n\t"   
02504                 "2:                     \n\t" 
02505                 "movq    (%%eax), %%mm0 \n\t"   
02506                 "psrlw     %%mm3, %%mm0 \n\t"   
02507                 
02508                 ".byte     0x0f, 0xdb, 0xc1 \n\t" 
02509                 "movq    %%mm0, (%%edi) \n\t"   
02510                 "add          $8, %%eax \n\t"   
02511                 "add          $8, %%edi \n\t"   
02512                 "dec              %%ecx \n\t"   
02513                 "jnz                 2b \n\t"   
02514                 "emms                   \n\t"   
02515                 "popa                   \n\t":"=m" (Dest)       
02516                 :"m"(Src1),             
02517                 "m"(SrcLength),         
02518                 "m"(N),                 
02519                 "m"(Mask)                       
02520                 );
02521 #endif
02522         return (0);
02523 #else
02524         return (-1);
02525 #endif
02526 }
02527 
02538 int SDL_imageFilterShiftRight(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
02539 {
02540         static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
02541         unsigned int i, istart;
02542         unsigned char *cursrc1;
02543         unsigned char *curdest;
02544 
02545         
02546         if ((Src1 == NULL) || (Dest == NULL))
02547                 return(-1);
02548         if (length == 0)
02549                 return(0);
02550 
02551         
02552         if (N > 8) {
02553                 return (-1);
02554         }
02555 
02556         
02557         if (N == 0) {
02558                 memcpy(Src1, Dest, length);
02559                 return (0); 
02560         }
02561 
02562         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02563 
02564                 
02565                 SDL_imageFilterShiftRightMMX(Src1, Dest, length, N, Mask);
02566 
02567                 
02568                 if ((length & 7) > 0) {
02569                         
02570                         istart = length & 0xfffffff8;
02571                         cursrc1 = &Src1[istart];
02572                         curdest = &Dest[istart];
02573                 } else {
02574                         
02575                         return (0);
02576                 }
02577         } else {
02578                 
02579                 istart = 0;
02580                 cursrc1 = Src1;
02581                 curdest = Dest;
02582         }
02583 
02584         
02585         for (i = istart; i < length; i++) {
02586                 *curdest = (unsigned char) *cursrc1 >> N;
02587                 
02588                 cursrc1++;
02589                 curdest++;
02590         }
02591 
02592         return (0);
02593 }
02594 
02605 int SDL_imageFilterShiftRightUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
02606 {
02607 #ifdef USE_MMX
02608 #if !defined(GCC__)
02609         __asm
02610         {
02611                 pusha
02612                         mov eax, Src1           
02613                         mov edi, Dest           
02614                         mov ecx, SrcLength      
02615                         shr ecx, 3      
02616                         align 16                        
02617 L13023:
02618                 movq mm0, [eax]         
02619                 psrld mm0, N
02620                         movq [edi], mm0         
02621                         add eax, 8      
02622                         add edi, 8      
02623                         dec              ecx            
02624                         jnz             L13023          
02625                         emms                            
02626                         popa
02627         }
02628 #else
02629         asm volatile
02630                 ("pusha              \n\t"
02631                 "mov          %1, %%eax \n\t"   
02632                 "mov          %0, %%edi \n\t"   
02633                 "mov          %2, %%ecx \n\t"   
02634                 "shr          $3, %%ecx \n\t"   
02635                 ".align 16              \n\t"   
02636                 "1: movq (%%eax), %%mm0 \n\t"   
02637                 "psrld   %3, %%mm0 \n\t"
02638                 "movq    %%mm0, (%%edi) \n\t"   
02639                 "add          $8, %%eax \n\t"   
02640                 "add          $8, %%edi \n\t"   
02641                 "dec              %%ecx \n\t"   
02642                 "jnz                 1b \n\t"   
02643                 "emms                   \n\t"   
02644                 "popa                   \n\t":"=m" (Dest)       
02645                 :"m"(Src1),             
02646                 "m"(SrcLength),         
02647                 "m"(N)                  
02648                 );
02649 #endif
02650         return (0);
02651 #else
02652         return (-1);
02653 #endif
02654 }
02655 
02666 int SDL_imageFilterShiftRightUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
02667 {
02668         unsigned int i, istart;
02669         unsigned char *cursrc1, *curdest;
02670         unsigned int *icursrc1, *icurdest;
02671         unsigned int result;
02672 
02673         
02674         if ((Src1 == NULL) || (Dest == NULL))
02675                 return(-1);
02676         if (length == 0)
02677                 return(0);
02678 
02679         if (N > 32) {
02680                 return (-1);
02681         }
02682 
02683         
02684         if (N == 0) {
02685                 memcpy(Src1, Dest, length);
02686                 return (0); 
02687         }
02688 
02689         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02690 
02691                 SDL_imageFilterShiftRightUintMMX(Src1, Dest, length, N);
02692 
02693                 
02694                 if ((length & 7) > 0) {
02695                         
02696                         istart = length & 0xfffffff8;
02697                         cursrc1 = &Src1[istart];
02698                         curdest = &Dest[istart];
02699                 } else {
02700                         
02701                         return (0);
02702                 }
02703         } else {
02704                 
02705                 istart = 0;
02706                 cursrc1 = Src1;
02707                 curdest = Dest;
02708         }
02709 
02710         
02711         icursrc1=(unsigned int *)cursrc1;
02712         icurdest=(unsigned int *)curdest;
02713         for (i = istart; i < length; i += 4) {
02714                 if ((i+4)<length) {
02715                         result = ((unsigned int)*icursrc1 >> N);
02716                         *icurdest = result;
02717                 }
02718                 
02719                 icursrc1++;
02720                 icurdest++;
02721         }
02722 
02723         return (0);
02724 }
02725 
02736 int SDL_imageFilterMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
02737 {
02738 #ifdef USE_MMX
02739 #if !defined(GCC__)
02740         __asm
02741         {
02742                 pusha
02743                         
02744                         mov al, C       
02745                         xor ah, ah      
02746                         mov bx, ax      
02747                         shl eax, 16     
02748                         mov ax, bx      
02749                         movd mm1, eax           
02750                         movd mm2, eax           
02751                         punpckldq mm1, mm2      
02752                         pxor mm0, mm0           
02753                         mov eax, Src1           
02754                         mov edi, Dest           
02755                         mov ecx, SrcLength      
02756                         shr ecx, 3      
02757                         cmp al, 128     
02758                         jg             L10251
02759                         align 16                        
02760 L10250:
02761                 movq mm3, [eax]         
02762                 movq mm4, mm3           
02763                         punpcklbw mm3, mm0      
02764                         punpckhbw mm4, mm0      
02765                         pmullw mm3, mm1         
02766                         pmullw mm4, mm1         
02767                         packuswb mm3, mm4       
02768                         movq [edi], mm3         
02769                         add eax, 8      
02770                         add edi, 8      
02771                         dec              ecx            
02772                         jnz            L10250           
02773                         jmp            L10252
02774                         align 16                        
02775 L10251:
02776                 movq mm3, [eax]         
02777                 movq mm4, mm3           
02778                         punpcklbw mm3, mm0      
02779                         punpckhbw mm4, mm0      
02780                         pmullw mm3, mm1         
02781                         pmullw mm4, mm1         
02782                         
02783                         movq mm5, mm3           
02784                         movq mm6, mm4           
02785                         psraw mm5, 15           
02786                         psraw mm6, 15           
02787                         pxor mm3, mm5           
02788                         pxor mm4, mm6           
02789                         psubsw mm3, mm5         
02790                         psubsw mm4, mm6         
02791                         packuswb mm3, mm4       
02792                         movq [edi], mm3         
02793                         add eax, 8      
02794                         add edi, 8      
02795                         dec              ecx            
02796                         jnz            L10251           
02797 L10252:
02798                 emms                            
02799                         popa
02800         }
02801 #else
02802         asm volatile
02803                 ("pusha              \n\t"
02804                 
02805                 "mov           %3, %%al \n\t"   
02806                 "xor         %%ah, %%ah \n\t"   
02807                 "mov         %%ax, %%bx \n\t"   
02808                 "shl         $16, %%eax \n\t"   
02809                 "mov         %%bx, %%ax \n\t"   
02810                 "movd      %%eax, %%mm1 \n\t"   
02811                 "movd      %%eax, %%mm2 \n\t"   
02812                 "punpckldq %%mm2, %%mm1 \n\t"   
02813                 "pxor      %%mm0, %%mm0 \n\t"   
02814                 "mov          %1, %%eax \n\t"   
02815                 "mov          %0, %%edi \n\t"   
02816                 "mov          %2, %%ecx \n\t"   
02817                 "shr          $3, %%ecx \n\t"   
02818                 "cmp         $128, %%al \n\t"   
02819                 "jg                  2f \n\t" ".align 16              \n\t"     
02820                 "1: movq (%%eax), %%mm3 \n\t"   
02821                 "movq      %%mm3, %%mm4 \n\t"   
02822                 "punpcklbw %%mm0, %%mm3 \n\t"   
02823                 "punpckhbw %%mm0, %%mm4 \n\t"   
02824                 "pmullw    %%mm1, %%mm3 \n\t"   
02825                 "pmullw    %%mm1, %%mm4 \n\t"   
02826                 "packuswb  %%mm4, %%mm3 \n\t"   
02827                 "movq    %%mm3, (%%edi) \n\t"   
02828                 "add          $8, %%eax \n\t"   
02829                 "add          $8, %%edi \n\t"   
02830                 "dec              %%ecx \n\t"   
02831                 "jnz                 1b \n\t"   
02832                 "jmp                 3f \n\t" ".align 16              \n\t"     
02833                 "2: movq (%%eax), %%mm3 \n\t"   
02834                 "movq      %%mm3, %%mm4 \n\t"   
02835                 "punpcklbw %%mm0, %%mm3 \n\t"   
02836                 "punpckhbw %%mm0, %%mm4 \n\t"   
02837                 "pmullw    %%mm1, %%mm3 \n\t"   
02838                 "pmullw    %%mm1, %%mm4 \n\t"   
02839                 
02840                 "movq      %%mm3, %%mm5 \n\t"   
02841                 "movq      %%mm4, %%mm6 \n\t"   
02842                 "psraw       $15, %%mm5 \n\t"   
02843                 "psraw       $15, %%mm6 \n\t"   
02844                 "pxor      %%mm5, %%mm3 \n\t"   
02845                 "pxor      %%mm6, %%mm4 \n\t"   
02846                 "psubsw    %%mm5, %%mm3 \n\t"   
02847                 "psubsw    %%mm6, %%mm4 \n\t"   
02848                 "packuswb  %%mm4, %%mm3 \n\t"   
02849                 "movq    %%mm3, (%%edi) \n\t"   
02850                 "add          $8, %%eax \n\t"   
02851                 "add          $8, %%edi \n\t"   
02852                 "dec              %%ecx \n\t"   
02853                 "jnz                 2b \n\t"   
02854                 "3: emms               \n\t"    
02855                 "popa                   \n\t":"=m" (Dest)       
02856                 :"m"(Src1),             
02857                 "m"(SrcLength),         
02858                 "m"(C)                  
02859                 );
02860 #endif
02861         return (0);
02862 #else
02863         return (-1);
02864 #endif
02865 }
02866 
02877 int SDL_imageFilterMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
02878 {
02879         unsigned int i, istart;
02880         int iC;
02881         unsigned char *cursrc1;
02882         unsigned char *curdest;
02883         int result;
02884 
02885         
02886         if ((Src1 == NULL) || (Dest == NULL))
02887                 return(-1);
02888         if (length == 0)
02889                 return(0);
02890 
02891         
02892         if (C == 1) {
02893                 memcpy(Src1, Dest, length);
02894                 return (0); 
02895         }
02896 
02897         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02898 
02899                 SDL_imageFilterMultByByteMMX(Src1, Dest, length, C);
02900 
02901                 
02902                 if ((length & 7) > 0) {
02903                         
02904                         istart = length & 0xfffffff8;
02905                         cursrc1 = &Src1[istart];
02906                         curdest = &Dest[istart];
02907                 } else {
02908                         
02909                         return (0);
02910                 }
02911         } else {
02912                 
02913                 istart = 0;
02914                 cursrc1 = Src1;
02915                 curdest = Dest;
02916         }
02917 
02918         
02919         iC = (int) C;
02920         for (i = istart; i < length; i++) {
02921                 result = (int) *cursrc1 * iC;
02922                 if (result > 255)
02923                         result = 255;
02924                 *curdest = (unsigned char) result;
02925                 
02926                 cursrc1++;
02927                 curdest++;
02928         }
02929 
02930         return (0);
02931 }
02932 
02944 int SDL_imageFilterShiftRightAndMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
02945                                                                                           unsigned char C)
02946 {
02947 #ifdef USE_MMX
02948 #if !defined(GCC__)
02949         __asm
02950         {
02951                 pusha
02952                         
02953                         mov al, C       
02954                         xor ah, ah      
02955                         mov bx, ax      
02956                         shl eax, 16     
02957                         mov ax, bx      
02958                         movd mm1, eax           
02959                         movd mm2, eax           
02960                         punpckldq mm1, mm2      
02961                         xor ecx, ecx    
02962                         mov cl, N       
02963                         movd mm7, ecx           
02964                         pxor mm0, mm0           
02965                         mov eax, Src1           
02966                         mov edi, Dest           
02967                         mov ecx, SrcLength      
02968                         shr ecx, 3      
02969                         align 16                        
02970 L1026:
02971                 movq mm3, [eax]         
02972                 movq mm4, mm3           
02973                         punpcklbw mm3, mm0      
02974                         punpckhbw mm4, mm0      
02975                         psrlw mm3, mm7          
02976                         psrlw mm4, mm7          
02977                         pmullw mm3, mm1         
02978                         pmullw mm4, mm1         
02979                         packuswb mm3, mm4       
02980                         movq [edi], mm3         
02981                         add eax, 8      
02982                         add edi, 8      
02983                         dec              ecx            
02984                         jnz             L1026           
02985                         emms                            
02986                         popa
02987         }
02988 #else
02989         asm volatile
02990                 ("pusha              \n\t"
02991                 
02992                 "mov           %4, %%al \n\t"   
02993                 "xor         %%ah, %%ah \n\t"   
02994                 "mov         %%ax, %%bx \n\t"   
02995                 "shl         $16, %%eax \n\t"   
02996                 "mov         %%bx, %%ax \n\t"   
02997                 "movd      %%eax, %%mm1 \n\t"   
02998                 "movd      %%eax, %%mm2 \n\t"   
02999                 "punpckldq %%mm2, %%mm1 \n\t"   
03000                 "xor       %%ecx, %%ecx \n\t"   
03001                 "mov           %3, %%cl \n\t"   
03002                 "movd      %%ecx, %%mm7 \n\t"   
03003                 "pxor      %%mm0, %%mm0 \n\t"   
03004                 "mov          %1, %%eax \n\t"   
03005                 "mov          %0, %%edi \n\t"   
03006                 "mov          %2, %%ecx \n\t"   
03007                 "shr          $3, %%ecx \n\t"   
03008                 ".align 16             \n\t"    
03009                 "1: movq (%%eax), %%mm3 \n\t"   
03010                 "movq      %%mm3, %%mm4 \n\t"   
03011                 "punpcklbw %%mm0, %%mm3 \n\t"   
03012                 "punpckhbw %%mm0, %%mm4 \n\t"   
03013                 "psrlw     %%mm7, %%mm3 \n\t"   
03014                 "psrlw     %%mm7, %%mm4 \n\t"   
03015                 "pmullw    %%mm1, %%mm3 \n\t"   
03016                 "pmullw    %%mm1, %%mm4 \n\t"   
03017                 "packuswb  %%mm4, %%mm3 \n\t"   
03018                 "movq    %%mm3, (%%edi) \n\t"   
03019                 "add          $8, %%eax \n\t"   
03020                 "add          $8, %%edi \n\t"   
03021                 "dec              %%ecx \n\t"   
03022                 "jnz                 1b \n\t"   
03023                 "emms                   \n\t"   
03024                 "popa                   \n\t":"=m" (Dest)       
03025                 :"m"(Src1),             
03026                 "m"(SrcLength),         
03027                 "m"(N),                 
03028                 "m"(C)                  
03029                 );
03030 #endif
03031         return (0);
03032 #else
03033         return (-1);
03034 #endif
03035 }
03036 
03048 int SDL_imageFilterShiftRightAndMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N,
03049                                                                                    unsigned char C)
03050 {
03051         unsigned int i, istart;
03052         int iC;
03053         unsigned char *cursrc1;
03054         unsigned char *curdest;
03055         int result;
03056 
03057         
03058         if ((Src1 == NULL) || (Dest == NULL))
03059                 return(-1);
03060         if (length == 0)
03061                 return(0);
03062 
03063         
03064         if (N > 8) {
03065                 return (-1);
03066         }
03067 
03068         
03069         if ((N == 0) && (C == 1)) {
03070                 memcpy(Src1, Dest, length);
03071                 return (0); 
03072         }
03073 
03074         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03075 
03076                 SDL_imageFilterShiftRightAndMultByByteMMX(Src1, Dest, length, N, C);
03077 
03078                 
03079                 if ((length & 7) > 0) {
03080                         
03081                         istart = length & 0xfffffff8;
03082                         cursrc1 = &Src1[istart];
03083                         curdest = &Dest[istart];
03084                 } else {
03085                         
03086                         return (0);
03087                 }
03088         } else {
03089                 
03090                 istart = 0;
03091                 cursrc1 = Src1;
03092                 curdest = Dest;
03093         }
03094 
03095         
03096         iC = (int) C;
03097         for (i = istart; i < length; i++) {
03098                 result = (int) (*cursrc1 >> N) * iC;
03099                 if (result > 255)
03100                         result = 255;
03101                 *curdest = (unsigned char) result;
03102                 
03103                 cursrc1++;
03104                 curdest++;
03105         }
03106 
03107         return (0);
03108 }
03109 
03121 int SDL_imageFilterShiftLeftByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
03122                                                                         unsigned char *Mask)
03123 {
03124 #ifdef USE_MMX
03125 #if !defined(GCC__)
03126         __asm
03127         {
03128                 pusha
03129                         mov edx, Mask           
03130                         movq mm0, [edx]         
03131                 xor ecx, ecx    
03132                         mov cl, N       
03133                         movd mm3, ecx           
03134                         pcmpeqb mm1, mm1        
03135 L10270:                         
03136                 psllw mm1, 1    
03137                         pand mm1, mm0        
03138                         
03139                         dec cl                          
03140                         jnz            L10270           
03141                         
03142                         mov eax, Src1           
03143                         mov edi, Dest           
03144                         mov ecx, SrcLength      
03145                         shr ecx, 3      
03146                         align 16                        
03147 L10271:
03148                 movq mm0, [eax]         
03149                 psllw mm0, mm3          
03150                         pand mm0, mm1    
03151                         
03152                         movq [edi], mm0         
03153                         add eax, 8      
03154                         add edi, 8      
03155                         dec              ecx            
03156                         jnz            L10271           
03157                         emms                            
03158                         popa
03159         }
03160 #else
03161         asm volatile
03162                 ("pusha              \n\t" "movl         %4, %%edx \n\t"        
03163                 "movq    (%%edx), %%mm0 \n\t"   
03164                 "xor       %%ecx, %%ecx \n\t"   
03165                 "mov           %3, %%cl \n\t"   
03166                 "movd      %%ecx, %%mm3 \n\t"   
03167                 "pcmpeqb   %%mm1, %%mm1 \n\t"   
03168                 "1:                     \n\t"   
03169                 "psllw        $1, %%mm1 \n\t"   
03170                 
03171                 ".byte     0x0f, 0xdb, 0xc8 \n\t" "dec %%cl               \n\t" 
03172                 "jnz                 1b \n\t"   
03173                 
03174                 "mov          %1, %%eax \n\t"   
03175                 "mov          %0, %%edi \n\t"   
03176                 "mov          %2, %%ecx \n\t"   
03177                 "shr          $3, %%ecx \n\t"   
03178                 ".align 16              \n\t"   
03179                 "2: movq (%%eax), %%mm0 \n\t"   
03180                 "psllw     %%mm3, %%mm0 \n\t"   
03181                 
03182                 ".byte     0x0f, 0xdb, 0xc1 \n\t" "movq    %%mm0, (%%edi) \n\t" 
03183                 "add          $8, %%eax \n\t"   
03184                 "add          $8, %%edi \n\t"   
03185                 "dec              %%ecx \n\t"   
03186                 "jnz                 2b \n\t"   
03187                 "emms                   \n\t"   
03188                 "popa                   \n\t":"=m" (Dest)       
03189                 :"m"(Src1),             
03190                 "m"(SrcLength),         
03191                 "m"(N),                 
03192                 "m"(Mask)                       
03193                 );
03194 #endif
03195         return (0);
03196 #else
03197         return (-1);
03198 #endif
03199 }
03200 
03211 int SDL_imageFilterShiftLeftByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
03212 {
03213         static unsigned char Mask[8] = { 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE };
03214         unsigned int i, istart;
03215         unsigned char *cursrc1, *curdest;
03216         int result;
03217 
03218         
03219         if ((Src1 == NULL) || (Dest == NULL))
03220                 return(-1);
03221         if (length == 0)
03222                 return(0);
03223 
03224         if (N > 8) {
03225                 return (-1);
03226         }
03227 
03228         
03229         if (N == 0) {
03230                 memcpy(Src1, Dest, length);
03231                 return (0); 
03232         }
03233 
03234         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03235 
03236                 SDL_imageFilterShiftLeftByteMMX(Src1, Dest, length, N, Mask);
03237 
03238                 
03239                 if ((length & 7) > 0) {
03240                         
03241                         istart = length & 0xfffffff8;
03242                         cursrc1 = &Src1[istart];
03243                         curdest = &Dest[istart];
03244                 } else {
03245                         
03246                         return (0);
03247                 }
03248         } else {
03249                 
03250                 istart = 0;
03251                 cursrc1 = Src1;
03252                 curdest = Dest;
03253         }
03254 
03255         
03256         for (i = istart; i < length; i++) {
03257                 result = ((int) *cursrc1 << N) & 0xff;
03258                 *curdest = (unsigned char) result;
03259                 
03260                 cursrc1++;
03261                 curdest++;
03262         }
03263 
03264         return (0);
03265 }
03266 
03277 int SDL_imageFilterShiftLeftUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
03278 {
03279 #ifdef USE_MMX
03280 #if !defined(GCC__)
03281         __asm
03282         {
03283                 pusha
03284                         mov eax, Src1           
03285                         mov edi, Dest           
03286                         mov ecx, SrcLength      
03287                         shr ecx, 3      
03288                         align 16                        
03289 L12023:
03290                 movq mm0, [eax]         
03291                 pslld mm0, N    
03292                         movq [edi], mm0         
03293                         add eax, 8      
03294                         add edi, 8      
03295                         dec              ecx            
03296                         jnz             L12023          
03297                         emms                            
03298                         popa
03299         }
03300 #else
03301         asm volatile
03302                 ("pusha              \n\t"
03303                 "mov          %1, %%eax \n\t"   
03304                 "mov          %0, %%edi \n\t"   
03305                 "mov          %2, %%ecx \n\t"   
03306                 "shr          $3, %%ecx \n\t"   
03307                 ".align 16              \n\t"   
03308                 "1: movq (%%eax), %%mm0 \n\t"   
03309                 "pslld   %3, %%mm0 \n\t"        
03310                 "movq    %%mm0, (%%edi) \n\t"   
03311                 "add          $8, %%eax \n\t"   
03312                 "add          $8, %%edi \n\t"   
03313                 "dec              %%ecx \n\t"   
03314                 "jnz                 1b \n\t"   
03315                 "emms                   \n\t"   
03316                 "popa                   \n\t":"=m" (Dest)       
03317                 :"m"(Src1),             
03318                 "m"(SrcLength),         
03319                 "m"(N)                  
03320                 );
03321 #endif
03322         return (0);
03323 #else
03324         return (-1);
03325 #endif
03326 }
03327 
03338 int SDL_imageFilterShiftLeftUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
03339 {
03340         unsigned int i, istart;
03341         unsigned char *cursrc1, *curdest;
03342         unsigned int *icursrc1, *icurdest;
03343         unsigned int result;
03344 
03345         
03346         if ((Src1 == NULL) || (Dest == NULL))
03347                 return(-1);
03348         if (length == 0)
03349                 return(0);
03350 
03351         if (N > 32) {
03352                 return (-1);
03353         }
03354 
03355         
03356         if (N == 0) {
03357                 memcpy(Src1, Dest, length);
03358                 return (0); 
03359         }
03360 
03361         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03362 
03363                 SDL_imageFilterShiftLeftUintMMX(Src1, Dest, length, N);
03364 
03365                 
03366                 if ((length & 7) > 0) {
03367                         
03368                         istart = length & 0xfffffff8;
03369                         cursrc1 = &Src1[istart];
03370                         curdest = &Dest[istart];
03371                 } else {
03372                         
03373                         return (0);
03374                 }
03375         } else {
03376                 
03377                 istart = 0;
03378                 cursrc1 = Src1;
03379                 curdest = Dest;
03380         }
03381 
03382         
03383         icursrc1=(unsigned int *)cursrc1;
03384         icurdest=(unsigned int *)curdest;
03385         for (i = istart; i < length; i += 4) {
03386                 if ((i+4)<length) {
03387                         result = ((unsigned int)*icursrc1 << N);
03388                         *icurdest = result;
03389                 }
03390                 
03391                 icursrc1++;
03392                 icurdest++;
03393         }
03394 
03395         return (0);
03396 }
03397 
03408 int SDL_imageFilterShiftLeftMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
03409 {
03410 #ifdef USE_MMX
03411 #if !defined(GCC__)
03412         __asm
03413         {
03414                 pusha
03415                         xor eax, eax    
03416                         mov al, N       
03417                         movd mm7, eax           
03418                         pxor mm0, mm0           
03419                         mov eax, Src1           
03420                         mov edi, Dest           
03421                         mov ecx, SrcLength      
03422                         shr ecx, 3      
03423                         cmp al, 7       
03424                         jg             L10281
03425                         align 16                        
03426 L10280:
03427                 movq mm3, [eax]         
03428                 movq mm4, mm3           
03429                         punpcklbw mm3, mm0      
03430                         punpckhbw mm4, mm0      
03431                         psllw mm3, mm7          
03432                         psllw mm4, mm7          
03433                         packuswb mm3, mm4       
03434                         movq [edi], mm3         
03435                         add eax, 8      
03436                         add edi, 8      
03437                         dec              ecx            
03438                         jnz            L10280           
03439                         jmp            L10282
03440                         align 16                        
03441 L10281:
03442                 movq mm3, [eax]         
03443                 movq mm4, mm3           
03444                         punpcklbw mm3, mm0      
03445                         punpckhbw mm4, mm0      
03446                         psllw mm3, mm7          
03447                         psllw mm4, mm7          
03448                         
03449                         movq mm5, mm3           
03450                         movq mm6, mm4           
03451                         psraw mm5, 15           
03452                         psraw mm6, 15           
03453                         pxor mm3, mm5           
03454                         pxor mm4, mm6           
03455                         psubsw mm3, mm5         
03456                         psubsw mm4, mm6         
03457                         packuswb mm3, mm4       
03458                         movq [edi], mm3         
03459                         add eax, 8      
03460                         add edi, 8      
03461                         dec              ecx            
03462                         jnz            L10281           
03463 L10282:
03464                 emms                            
03465                         popa
03466         }
03467 #else
03468         asm volatile
03469                 ("pusha              \n\t" "xor       %%eax, %%eax \n\t"        
03470                 "mov           %3, %%al \n\t"   
03471                 "movd      %%eax, %%mm7 \n\t"   
03472                 "pxor      %%mm0, %%mm0 \n\t"   
03473                 "mov         %1, %%eax  \n\t"   
03474                 "mov         %0, %%edi  \n\t"   
03475                 "mov         %2, %%ecx  \n\t"   
03476                 "shr         $3, %%ecx  \n\t"   
03477                 "cmp           $7, %%al \n\t"   
03478                 "jg                  2f \n\t" ".align 16              \n\t"     
03479                 "1: movq (%%eax), %%mm3 \n\t"   
03480                 "movq      %%mm3, %%mm4 \n\t"   
03481                 "punpcklbw %%mm0, %%mm3 \n\t"   
03482                 "punpckhbw %%mm0, %%mm4 \n\t"   
03483                 "psllw     %%mm7, %%mm3 \n\t"   
03484                 "psllw     %%mm7, %%mm4 \n\t"   
03485                 "packuswb  %%mm4, %%mm3 \n\t"   
03486                 "movq    %%mm3, (%%edi) \n\t"   
03487                 "add          $8, %%eax \n\t"   
03488                 "add          $8, %%edi \n\t"   
03489                 "dec              %%ecx \n\t"   
03490                 "jnz                 1b \n\t"   
03491                 "jmp                 3f \n\t" ".align 16              \n\t"     
03492                 "2: movq (%%eax), %%mm3 \n\t"   
03493                 "movq      %%mm3, %%mm4 \n\t"   
03494                 "punpcklbw %%mm0, %%mm3 \n\t"   
03495                 "punpckhbw %%mm0, %%mm4 \n\t"   
03496                 "psllw     %%mm7, %%mm3 \n\t"   
03497                 "psllw     %%mm7, %%mm4 \n\t"   
03498                 
03499                 "movq      %%mm3, %%mm5 \n\t"   
03500                 "movq      %%mm4, %%mm6 \n\t"   
03501                 "psraw       $15, %%mm5 \n\t"   
03502                 "psraw       $15, %%mm6 \n\t"   
03503                 "pxor      %%mm5, %%mm3 \n\t"   
03504                 "pxor      %%mm6, %%mm4 \n\t"   
03505                 "psubsw    %%mm5, %%mm3 \n\t"   
03506                 "psubsw    %%mm6, %%mm4 \n\t"   
03507                 "packuswb  %%mm4, %%mm3 \n\t"   
03508                 "movq    %%mm3, (%%edi) \n\t"   
03509                 "add          $8, %%eax \n\t"   
03510                 "add          $8, %%edi \n\t"   
03511                 "dec              %%ecx \n\t"   
03512                 "jnz                 2b \n\t"   
03513                 "3: emms                \n\t"   
03514                 "popa                   \n\t":"=m" (Dest)       
03515                 :"m"(Src1),             
03516                 "m"(SrcLength),         
03517                 "m"(N)                  
03518                 );
03519 #endif
03520         return (0);
03521 #else
03522         return (-1);
03523 #endif
03524 }
03525 
03536 int SDL_imageFilterShiftLeft(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
03537 {
03538         unsigned int i, istart;
03539         unsigned char *cursrc1, *curdest;
03540         int result;
03541 
03542         
03543         if ((Src1 == NULL) || (Dest == NULL))
03544                 return(-1);
03545         if (length == 0)
03546                 return(0);
03547 
03548         if (N > 8) {
03549                 return (-1);
03550         }
03551 
03552         
03553         if (N == 0) {
03554                 memcpy(Src1, Dest, length);
03555                 return (0); 
03556         }
03557 
03558         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03559 
03560                 SDL_imageFilterShiftLeftMMX(Src1, Dest, length, N);
03561 
03562                 
03563                 if ((length & 7) > 0) {
03564                         
03565                         istart = length & 0xfffffff8;
03566                         cursrc1 = &Src1[istart];
03567                         curdest = &Dest[istart];
03568                 } else {
03569                         
03570                         return (0);
03571                 }
03572         } else {
03573                 
03574                 istart = 0;
03575                 cursrc1 = Src1;
03576                 curdest = Dest;
03577         }
03578 
03579         
03580         for (i = istart; i < length; i++) {
03581                 result = (int) *cursrc1 << N;
03582                 if (result > 255)
03583                         result = 255;
03584                 *curdest = (unsigned char) result;
03585                 
03586                 cursrc1++;
03587                 curdest++;
03588         }
03589 
03590         return (0);
03591 }
03592 
03603 int SDL_imageFilterBinarizeUsingThresholdMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char T)
03604 {
03605 #ifdef USE_MMX
03606 #if !defined(GCC__)
03607         __asm
03608         {
03609                 pusha
03610                         
03611                         pcmpeqb mm1, mm1        
03612                         pcmpeqb mm2, mm2        
03613                         mov al, T       
03614                         mov ah, al      
03615                         mov bx, ax      
03616                         shl eax, 16     
03617                         mov ax, bx      
03618                         movd mm3, eax           
03619                         movd mm4, eax           
03620                         punpckldq mm3, mm4      
03621                         psubusb mm2, mm3        
03622                         mov eax, Src1           
03623                         mov edi, Dest           
03624                         mov ecx, SrcLength      
03625                         shr ecx, 3      
03626                         align 16                        
03627 L1029:
03628                 movq mm0, [eax]         
03629                 paddusb mm0, mm2        
03630                         pcmpeqb mm0, mm1        
03631                         movq [edi], mm0         
03632                         add eax, 8      
03633                         add edi, 8      
03634                         dec              ecx            
03635                         jnz             L1029           
03636                         emms                            
03637                         popa
03638         }
03639 #else
03640         asm volatile
03641                 ("pusha              \n\t"
03642                 
03643                 "pcmpeqb   %%mm1, %%mm1 \n\t"   
03644                 "pcmpeqb   %%mm2, %%mm2 \n\t"   
03645                 "mov           %3, %%al \n\t"   
03646                 "mov         %%al, %%ah \n\t"   
03647                 "mov         %%ax, %%bx \n\t"   
03648                 "shl         $16, %%eax \n\t"   
03649                 "mov         %%bx, %%ax \n\t"   
03650                 "movd      %%eax, %%mm3 \n\t"   
03651                 "movd      %%eax, %%mm4 \n\t"   
03652                 "punpckldq %%mm4, %%mm3 \n\t"   
03653                 "psubusb   %%mm3, %%mm2 \n\t"   
03654                 "mov          %1, %%eax \n\t"   
03655                 "mov          %0, %%edi \n\t"   
03656                 "mov          %2, %%ecx \n\t"   
03657                 "shr          $3, %%ecx \n\t"   
03658                 ".align 16              \n\t"   
03659                 "1:                     \n\t" 
03660                 "movq    (%%eax), %%mm0 \n\t"   
03661                 "paddusb   %%mm2, %%mm0 \n\t"   
03662                 "pcmpeqb   %%mm1, %%mm0 \n\t"   
03663                 "movq    %%mm0, (%%edi) \n\t"   
03664                 "add          $8, %%eax \n\t"   
03665                 "add          $8, %%edi \n\t"   
03666                 "dec              %%ecx \n\t"   
03667                 "jnz                 1b \n\t"   
03668                 "emms                   \n\t"   
03669                 "popa                   \n\t":"=m" (Dest)       
03670                 :"m"(Src1),             
03671                 "m"(SrcLength),         
03672                 "m"(T)                  
03673                 );
03674 #endif
03675         return (0);
03676 #else
03677         return (-1);
03678 #endif
03679 }
03680 
03691 int SDL_imageFilterBinarizeUsingThreshold(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char T)
03692 {
03693         unsigned int i, istart;
03694         unsigned char *cursrc1;
03695         unsigned char *curdest;
03696 
03697         
03698         if ((Src1 == NULL) || (Dest == NULL))
03699                 return(-1);
03700         if (length == 0)
03701                 return(0);
03702 
03703         
03704         if (T == 0) {
03705                 memset(Dest, 255, length);
03706                 return (0); 
03707         }
03708 
03709         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03710 
03711                 SDL_imageFilterBinarizeUsingThresholdMMX(Src1, Dest, length, T);
03712 
03713                 
03714                 if ((length & 7) > 0) {
03715                         
03716                         istart = length & 0xfffffff8;
03717                         cursrc1 = &Src1[istart];
03718                         curdest = &Dest[istart];
03719                 } else {
03720                         
03721                         return (0);
03722                 }
03723         } else {
03724                 
03725                 istart = 0;
03726                 cursrc1 = Src1;
03727                 curdest = Dest;
03728         }
03729 
03730         
03731         for (i = istart; i < length; i++) {
03732                 *curdest = (unsigned char)(((unsigned char)*cursrc1 >= T) ? 255 : 0);
03733                 
03734                 cursrc1++;
03735                 curdest++;
03736         }
03737 
03738         return (0);
03739 }
03740 
03752 int SDL_imageFilterClipToRangeMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char Tmin,
03753                                                                   unsigned char Tmax)
03754 {
03755 #ifdef USE_MMX
03756 #if !defined(GCC__)
03757         __asm
03758         {
03759                 pusha
03760                         pcmpeqb mm1, mm1        
03761                         
03762                         mov al, Tmax    
03763                         mov ah, al      
03764                         mov bx, ax      
03765                         shl eax, 16     
03766                         mov ax, bx      
03767                         movd mm3, eax           
03768                         movd mm4, eax           
03769                         punpckldq mm3, mm4      
03770                         psubusb mm1, mm3        
03771                         
03772                         mov al, Tmin    
03773                         mov ah, al      
03774                         mov bx, ax      
03775                         shl eax, 16     
03776                         mov ax, bx      
03777                         movd mm5, eax           
03778                         movd mm4, eax           
03779                         punpckldq mm5, mm4      
03780                         movq mm7, mm5           
03781                         paddusb mm7, mm1        
03782                         mov eax, Src1           
03783                         mov edi, Dest           
03784                         mov ecx, SrcLength      
03785                         shr ecx, 3      
03786                         align 16                        
03787 L1030:
03788                 movq mm0, [eax]         
03789                 paddusb mm0, mm1        
03790                         psubusb mm0, mm7        
03791                         paddusb mm0, mm5        
03792                         movq [edi], mm0         
03793                         add eax, 8      
03794                         add edi, 8      
03795                         dec              ecx            
03796                         jnz             L1030           
03797                         emms                            
03798                         popa
03799         }
03800 #else
03801         asm volatile
03802                 ("pusha              \n\t" "pcmpeqb   %%mm1, %%mm1 \n\t"        
03803                 
03804                 "mov           %4, %%al \n\t"   
03805                 "mov         %%al, %%ah \n\t"   
03806                 "mov         %%ax, %%bx \n\t"   
03807                 "shl         $16, %%eax \n\t"   
03808                 "mov         %%bx, %%ax \n\t"   
03809                 "movd      %%eax, %%mm3 \n\t"   
03810                 "movd      %%eax, %%mm4 \n\t"   
03811                 "punpckldq %%mm4, %%mm3 \n\t"   
03812                 "psubusb   %%mm3, %%mm1 \n\t"   
03813                 
03814                 "mov           %3, %%al \n\t"   
03815                 "mov         %%al, %%ah \n\t"   
03816                 "mov         %%ax, %%bx \n\t"   
03817                 "shl         $16, %%eax \n\t"   
03818                 "mov         %%bx, %%ax \n\t"   
03819                 "movd      %%eax, %%mm5 \n\t"   
03820                 "movd      %%eax, %%mm4 \n\t"   
03821                 "punpckldq %%mm4, %%mm5 \n\t"   
03822                 "movq      %%mm5, %%mm7 \n\t"   
03823                 "paddusb   %%mm1, %%mm7 \n\t"   
03824                 "mov          %1, %%eax \n\t"   
03825                 "mov          %0, %%edi \n\t"   
03826                 "mov          %2, %%ecx \n\t"   
03827                 "shr          $3, %%ecx \n\t"   
03828                 ".align 16              \n\t"   
03829                 "1:                     \n\t" 
03830                 "movq    (%%eax), %%mm0 \n\t"   
03831                 "paddusb   %%mm1, %%mm0 \n\t"   
03832                 "psubusb   %%mm7, %%mm0 \n\t"   
03833                 "paddusb   %%mm5, %%mm0 \n\t"   
03834                 "movq    %%mm0, (%%edi) \n\t"   
03835                 "add          $8, %%eax \n\t"   
03836                 "add          $8, %%edi \n\t"   
03837                 "dec              %%ecx \n\t"   
03838                 "jnz                 1b \n\t"   
03839                 "emms                   \n\t"   
03840                 "popa                   \n\t":"=m" (Dest)       
03841                 :"m"(Src1),             
03842                 "m"(SrcLength),         
03843                 "m"(Tmin),              
03844                 "m"(Tmax)                       
03845                 );
03846 #endif
03847         return (0);
03848 #else
03849         return (-1);
03850 #endif
03851 }
03852 
03864 int SDL_imageFilterClipToRange(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char Tmin,
03865                                                            unsigned char Tmax)
03866 {
03867         unsigned int i, istart;
03868         unsigned char *cursrc1;
03869         unsigned char *curdest;
03870 
03871         
03872         if ((Src1 == NULL) || (Dest == NULL))
03873                 return(-1);
03874         if (length == 0)
03875                 return(0);
03876 
03877         
03878         if ((Tmin == 0) && (Tmax == 25)) {
03879                 memcpy(Src1, Dest, length);
03880                 return (0); 
03881         }
03882 
03883         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03884 
03885                 SDL_imageFilterClipToRangeMMX(Src1, Dest, length, Tmin, Tmax);
03886 
03887                 
03888                 if ((length & 7) > 0) {
03889                         
03890                         istart = length & 0xfffffff8;
03891                         cursrc1 = &Src1[istart];
03892                         curdest = &Dest[istart];
03893                 } else {
03894                         
03895                         return (0);
03896                 }
03897         } else {
03898                 
03899                 istart = 0;
03900                 cursrc1 = Src1;
03901                 curdest = Dest;
03902         }
03903 
03904         
03905         for (i = istart; i < length; i++) {
03906                 if (*cursrc1 < Tmin) {
03907                         *curdest = Tmin;
03908                 } else if (*cursrc1 > Tmax) {
03909                         *curdest = Tmax;
03910                 } else {
03911                         *curdest = *cursrc1;
03912                 }
03913                 
03914                 cursrc1++;
03915                 curdest++;
03916         }
03917 
03918         return (0);
03919 }
03920 
03934 int SDL_imageFilterNormalizeLinearMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, int Cmin, int Cmax,
03935                                                                           int Nmin, int Nmax)
03936 {
03937 #ifdef USE_MMX
03938 #if !defined(GCC__)
03939         __asm
03940         {
03941                 pusha
03942                         mov ax, WORD PTR Nmax           
03943                         mov bx, WORD PTR Cmax           
03944                         sub ax, WORD PTR Nmin           
03945                         sub bx, WORD PTR Cmin           
03946                         jz             L10311           
03947                         xor dx, dx      
03948                         div               bx            
03949                         jmp            L10312
03950 L10311:
03951                 mov ax, 255     
03952 L10312:                         
03953                 mov bx, ax      
03954                         shl eax, 16     
03955                         mov ax, bx      
03956                         movd mm0, eax           
03957                         movd mm1, eax           
03958                         punpckldq mm0, mm1      
03959                         
03960                         mov ax, WORD PTR Cmin           
03961                         mov bx, ax      
03962                         shl eax, 16     
03963                         mov ax, bx      
03964                         movd mm1, eax           
03965                         movd mm2, eax           
03966                         punpckldq mm1, mm2      
03967                         
03968                         mov ax, WORD PTR Nmin           
03969                         mov bx, ax      
03970                         shl eax, 16     
03971                         mov ax, bx      
03972                         movd mm2, eax           
03973                         movd mm3, eax           
03974                         punpckldq mm2, mm3      
03975                         pxor mm7, mm7           
03976                         mov eax, Src1           
03977                         mov edi, Dest           
03978                         mov ecx, SrcLength      
03979                         shr ecx, 3      
03980                         align 16                        
03981 L1031:
03982                 movq mm3, [eax]         
03983                 movq mm4, mm3           
03984                         punpcklbw mm3, mm7      
03985                         punpckhbw mm4, mm7      
03986                         psubusb mm3, mm1        
03987                         psubusb mm4, mm1        
03988                         pmullw mm3, mm0         
03989                         pmullw mm4, mm0         
03990                         paddusb mm3, mm2        
03991                         paddusb mm4, mm2        
03992                         
03993                         movq mm5, mm3           
03994                         movq mm6, mm4           
03995                         psraw mm5, 15           
03996                         psraw mm6, 15           
03997                         pxor mm3, mm5           
03998                         pxor mm4, mm6           
03999                         psubsw mm3, mm5         
04000                         psubsw mm4, mm6         
04001                         packuswb mm3, mm4       
04002                         movq [edi], mm3         
04003                         add eax, 8      
04004                         add edi, 8      
04005                         dec              ecx            
04006                         jnz             L1031           
04007                         emms                            
04008                         popa
04009         }
04010 #else
04011         asm volatile
04012                 ("pusha              \n\t" "mov           %6, %%ax \n\t"        
04013                 "mov           %4, %%bx \n\t"   
04014                 "sub           %5, %%ax \n\t"   
04015                 "sub           %3, %%bx \n\t"   
04016                 "jz                  1f \n\t"   
04017                 "xor         %%dx, %%dx \n\t"   
04018                 "div               %%bx \n\t"   
04019                 "jmp                 2f \n\t" "1:                     \n\t" "mov         $255, %%ax \n\t"       
04020                 "2:                    \n\t"    
04021                 "mov         %%ax, %%bx \n\t"   
04022                 "shl         $16, %%eax \n\t"   
04023                 "mov         %%bx, %%ax \n\t"   
04024                 "movd      %%eax, %%mm0 \n\t"   
04025                 "movd      %%eax, %%mm1 \n\t"   
04026                 "punpckldq %%mm1, %%mm0 \n\t"   
04027                 
04028                 "mov           %3, %%ax \n\t"   
04029                 "mov         %%ax, %%bx \n\t"   
04030                 "shl         $16, %%eax \n\t"   
04031                 "mov         %%bx, %%ax \n\t"   
04032                 "movd      %%eax, %%mm1 \n\t"   
04033                 "movd      %%eax, %%mm2 \n\t"   
04034                 "punpckldq %%mm2, %%mm1 \n\t"   
04035                 
04036                 "mov           %5, %%ax \n\t"   
04037                 "mov         %%ax, %%bx \n\t"   
04038                 "shl         $16, %%eax \n\t"   
04039                 "mov         %%bx, %%ax \n\t"   
04040                 "movd      %%eax, %%mm2 \n\t"   
04041                 "movd      %%eax, %%mm3 \n\t"   
04042                 "punpckldq %%mm3, %%mm2 \n\t"   
04043                 "pxor      %%mm7, %%mm7 \n\t"   
04044                 "mov          %1, %%eax \n\t"   
04045                 "mov          %0, %%edi \n\t"   
04046                 "mov          %2, %%ecx \n\t"   
04047                 "shr          $3, %%ecx \n\t"   
04048                 ".align 16              \n\t"   
04049                 "1:                     \n\t" 
04050                 "movq    (%%eax), %%mm3 \n\t"   
04051                 "movq      %%mm3, %%mm4 \n\t"   
04052                 "punpcklbw %%mm7, %%mm3 \n\t"   
04053                 "punpckhbw %%mm7, %%mm4 \n\t"   
04054                 "psubusb   %%mm1, %%mm3 \n\t"   
04055                 "psubusb   %%mm1, %%mm4 \n\t"   
04056                 "pmullw    %%mm0, %%mm3 \n\t"   
04057                 "pmullw    %%mm0, %%mm4 \n\t"   
04058                 "paddusb   %%mm2, %%mm3 \n\t"   
04059                 "paddusb   %%mm2, %%mm4 \n\t"   
04060                 
04061                 "movq      %%mm3, %%mm5 \n\t"   
04062                 "movq      %%mm4, %%mm6 \n\t"   
04063                 "psraw       $15, %%mm5 \n\t"   
04064                 "psraw       $15, %%mm6 \n\t"   
04065                 "pxor      %%mm5, %%mm3 \n\t"   
04066                 "pxor      %%mm6, %%mm4 \n\t"   
04067                 "psubsw    %%mm5, %%mm3 \n\t"   
04068                 "psubsw    %%mm6, %%mm4 \n\t"   
04069                 "packuswb  %%mm4, %%mm3 \n\t"   
04070                 "movq    %%mm3, (%%edi) \n\t"   
04071                 "add          $8, %%eax \n\t"   
04072                 "add          $8, %%edi \n\t"   
04073                 "dec              %%ecx \n\t"   
04074                 "jnz                 1b \n\t"   
04075                 "emms                   \n\t"   
04076                 "popa                   \n\t":"=m" (Dest)       
04077                 :"m"(Src1),             
04078                 "m"(SrcLength),         
04079                 "m"(Cmin),              
04080                 "m"(Cmax),              
04081                 "m"(Nmin),              
04082                 "m"(Nmax)                       
04083                 );
04084 #endif
04085         return (0);
04086 #else
04087         return (-1);
04088 #endif
04089 }
04090 
04104 int SDL_imageFilterNormalizeLinear(unsigned char *Src, unsigned char *Dest, unsigned int length, int Cmin, int Cmax, int Nmin,
04105                                                                    int Nmax)
04106 {
04107         unsigned int i, istart;
04108         unsigned char *cursrc;
04109         unsigned char *curdest;
04110         int dN, dC, factor;
04111         int result;
04112 
04113         
04114         if ((Src == NULL) || (Dest == NULL))
04115                 return(-1);
04116         if (length == 0)
04117                 return(0);
04118 
04119         if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
04120 
04121                 SDL_imageFilterNormalizeLinearMMX(Src, Dest, length, Cmin, Cmax, Nmin, Nmax);
04122 
04123                 
04124                 if ((length & 7) > 0) {
04125                         
04126                         istart = length & 0xfffffff8;
04127                         cursrc = &Src[istart];
04128                         curdest = &Dest[istart];
04129                 } else {
04130                         
04131                         return (0);
04132                 }
04133         } else {
04134                 
04135                 istart = 0;
04136                 cursrc = Src;
04137                 curdest = Dest;
04138         }
04139 
04140         
04141         dC = Cmax - Cmin;
04142         if (dC == 0)
04143                 return (0);
04144         dN = Nmax - Nmin;
04145         factor = dN / dC;
04146         for (i = istart; i < length; i++) {
04147                 result = factor * ((int) (*cursrc) - Cmin) + Nmin;
04148                 if (result > 255)
04149                         result = 255;
04150                 *curdest = (unsigned char) result;
04151                 
04152                 cursrc++;
04153                 curdest++;
04154         }
04155 
04156         return (0);
04157 }
04158 
04159 
04160 
04175 int SDL_imageFilterConvolveKernel3x3Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns,
04176                                                                                    signed short *Kernel, unsigned char Divisor)
04177 {
04178         
04179         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
04180                 return(-1);
04181 
04182         if ((columns < 3) || (rows < 3) || (Divisor == 0))
04183                 return (-1);
04184 
04185         if ((SDL_imageFilterMMXdetect())) {
04186 #ifdef USE_MMX
04187 #if !defined(GCC__)
04188                 __asm
04189                 {
04190                         pusha
04191                                 pxor mm0, mm0           
04192                                 xor ebx, ebx    
04193                                 mov bl, Divisor         
04194                                 mov edx, Kernel         
04195                                 movq mm5, [edx]         
04196                         add edx, 8      
04197                                 movq mm6, [edx]         
04198                         add edx, 8      
04199                                 movq mm7, [edx]         
04200                         
04201                         mov eax, columns        
04202                                 mov esi, Src    
04203                                 mov edi, Dest           
04204                                 add edi, eax    
04205                                 inc              edi            
04206                                 mov edx, rows           
04207                                 sub edx, 2      
04208                                 
04209 L10320:
04210                         mov ecx, eax    
04211                                 sub ecx, 2      
04212                                 align 16                        
04213 L10322:
04214                         
04215                         movq mm1, [esi]         
04216                         add esi, eax    
04217                                 movq mm2, [esi]         
04218                         add esi, eax    
04219                                 movq mm3, [esi]         
04220                         punpcklbw mm1, mm0      
04221                                 punpcklbw mm2, mm0      
04222                                 punpcklbw mm3, mm0      
04223                                 pmullw mm1, mm5         
04224                                 pmullw mm2, mm6         
04225                                 pmullw mm3, mm7         
04226                                 paddsw mm1, mm2         
04227                                 paddsw mm1, mm3         
04228                                 movq mm2, mm1           
04229                                 psrlq mm1, 32           
04230                                 paddsw mm1, mm2         
04231                                 movq mm3, mm1           
04232                                 psrlq mm1, 16           
04233                                 paddsw mm1, mm3         
04234                                 
04235                                 movd mm2, eax           
04236                                 movd mm3, edx           
04237                                 movd eax, mm1           
04238                                 psraw mm1, 15           
04239                                 movd edx, mm1           
04240                                 idiv bx         
04241                                 movd mm1, eax           
04242                                 packuswb mm1, mm0       
04243                                 movd eax, mm1           
04244                                 mov [edi], al           
04245                                 movd edx, mm3           
04246                                 movd eax, mm2           
04247                                 
04248                                 sub esi, eax    
04249                                 sub esi, eax    
04250                                 inc              esi            
04251                                 inc              edi            
04252                                 
04253                                 dec              ecx            
04254                                 jnz            L10322           
04255                                 add esi, 2      
04256                                 add edi, 2      
04257                                 dec              edx            
04258                                 jnz            L10320           
04259                                 
04260                                 emms                            
04261                                 popa
04262                 }
04263 #else
04264                 asm volatile
04265                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
04266                         "xor       %%ebx, %%ebx \n\t"   
04267                         "mov           %5, %%bl \n\t"   
04268                         "mov          %4, %%edx \n\t"   
04269                         "movq    (%%edx), %%mm5 \n\t"   
04270                         "add          $8, %%edx \n\t"   
04271                         "movq    (%%edx), %%mm6 \n\t"   
04272                         "add          $8, %%edx \n\t"   
04273                         "movq    (%%edx), %%mm7 \n\t"   
04274                         
04275                         "mov          %3, %%eax \n\t"   
04276                         "mov          %1, %%esi \n\t"   
04277                         "mov          %0, %%edi \n\t"   
04278                         "add       %%eax, %%edi \n\t"   
04279                         "inc              %%edi \n\t"   
04280                         "mov          %2, %%edx \n\t"   
04281                         "sub          $2, %%edx \n\t"   
04282                         
04283                         ".L10320:               \n\t" "mov       %%eax, %%ecx \n\t"     
04284                         "sub          $2, %%ecx \n\t"   
04285                         ".align 16              \n\t"   
04286                         ".L10322:               \n\t"
04287                         
04288                         "movq    (%%esi), %%mm1 \n\t"   
04289                         "add       %%eax, %%esi \n\t"   
04290                         "movq    (%%esi), %%mm2 \n\t"   
04291                         "add       %%eax, %%esi \n\t"   
04292                         "movq    (%%esi), %%mm3 \n\t"   
04293                         "punpcklbw %%mm0, %%mm1 \n\t"   
04294                         "punpcklbw %%mm0, %%mm2 \n\t"   
04295                         "punpcklbw %%mm0, %%mm3 \n\t"   
04296                         "pmullw    %%mm5, %%mm1 \n\t"   
04297                         "pmullw    %%mm6, %%mm2 \n\t"   
04298                         "pmullw    %%mm7, %%mm3 \n\t"   
04299                         "paddsw    %%mm2, %%mm1 \n\t"   
04300                         "paddsw    %%mm3, %%mm1 \n\t"   
04301                         "movq      %%mm1, %%mm2 \n\t"   
04302                         "psrlq       $32, %%mm1 \n\t"   
04303                         "paddsw    %%mm2, %%mm1 \n\t"   
04304                         "movq      %%mm1, %%mm3 \n\t"   
04305                         "psrlq       $16, %%mm1 \n\t"   
04306                         "paddsw    %%mm3, %%mm1 \n\t"   
04307                         
04308                         "movd      %%eax, %%mm2 \n\t"   
04309                         "movd      %%edx, %%mm3 \n\t"   
04310                         "movd      %%mm1, %%eax \n\t"   
04311                         "psraw       $15, %%mm1 \n\t"   
04312                         "movd      %%mm1, %%edx \n\t"   
04313                         "idivw             %%bx \n\t"   
04314                         "movd      %%eax, %%mm1 \n\t"   
04315                         "packuswb  %%mm0, %%mm1 \n\t"   
04316                         "movd      %%mm1, %%eax \n\t"   
04317                         "mov      %%al, (%%edi) \n\t"   
04318                         "movd      %%mm3, %%edx \n\t"   
04319                         "movd      %%mm2, %%eax \n\t"   
04320                         
04321                         "sub       %%eax, %%esi \n\t"   
04322                         "sub       %%eax, %%esi \n\t"   
04323                         "inc              %%esi \n\t"   
04324                         "inc              %%edi \n\t"   
04325                         
04326                         "dec              %%ecx \n\t"   
04327                         "jnz            .L10322 \n\t"   
04328                         "add          $2, %%esi \n\t"   
04329                         "add          $2, %%edi \n\t"   
04330                         "dec              %%edx \n\t"   
04331                         "jnz            .L10320 \n\t"   
04332                         
04333                         "emms                   \n\t"   
04334                         "popa                   \n\t":"=m" (Dest)       
04335                         :"m"(Src),              
04336                         "m"(rows),              
04337                         "m"(columns),           
04338                         "m"(Kernel),            
04339                         "m"(Divisor)            
04340                         );
04341 #endif
04342 #endif
04343                 return (0);
04344         } else {
04345                 
04346                 return (-1);
04347         }
04348 }
04349 
04364 int SDL_imageFilterConvolveKernel5x5Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns,
04365                                                                                    signed short *Kernel, unsigned char Divisor)
04366 {
04367         
04368         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
04369                 return(-1);
04370 
04371         if ((columns < 5) || (rows < 5) || (Divisor == 0))
04372                 return (-1);
04373 
04374         if ((SDL_imageFilterMMXdetect())) {
04375 #ifdef USE_MMX
04376 #if !defined(GCC__)
04377                 __asm
04378                 {
04379                         pusha
04380                                 pxor mm0, mm0           
04381                                 xor ebx, ebx    
04382                                 mov bl, Divisor         
04383                                 movd mm5, ebx           
04384                                 mov edx, Kernel         
04385                                 mov esi, Src    
04386                                 mov edi, Dest           
04387                                 add edi, 2      
04388                                 mov eax, columns        
04389                                 shl eax, 1      
04390                                 add edi, eax    
04391                                 shr eax, 1      
04392                                 mov ebx, rows           
04393                                 sub ebx, 4      
04394                                 
04395 L10330:
04396                         mov ecx, eax    
04397                                 sub ecx, 4      
04398                                 align 16                        
04399 L10332:
04400                         pxor mm7, mm7           
04401                                 movd mm6, esi           
04402                                 
04403                                 movq mm1, [esi]         
04404                         movq mm2, mm1           
04405                                 add esi, eax    
04406                                 movq mm3, [edx]         
04407                         add edx, 8      
04408                                 movq mm4, [edx]         
04409                         add edx, 8      
04410                                 punpcklbw mm1, mm0      
04411                                 punpckhbw mm2, mm0      
04412                                 pmullw mm1, mm3         
04413                                 pmullw mm2, mm4         
04414                                 paddsw mm1, mm2         
04415                                 paddsw mm7, mm1         
04416                                 
04417                                 movq mm1, [esi]         
04418                         movq mm2, mm1           
04419                                 add esi, eax    
04420                                 movq mm3, [edx]         
04421                         add edx, 8      
04422                                 movq mm4, [edx]         
04423                         add edx, 8      
04424                                 punpcklbw mm1, mm0      
04425                                 punpckhbw mm2, mm0      
04426                                 pmullw mm1, mm3         
04427                                 pmullw mm2, mm4         
04428                                 paddsw mm1, mm2         
04429                                 paddsw mm7, mm1         
04430                                 
04431                                 movq mm1, [esi]         
04432                         movq mm2, mm1           
04433                                 add esi, eax    
04434                                 movq mm3, [edx]         
04435                         add edx, 8      
04436                                 movq mm4, [edx]         
04437                         add edx, 8      
04438                                 punpcklbw mm1, mm0      
04439                                 punpckhbw mm2, mm0      
04440                                 pmullw mm1, mm3         
04441                                 pmullw mm2, mm4         
04442                                 paddsw mm1, mm2         
04443                                 paddsw mm7, mm1         
04444                                 
04445                                 movq mm1, [esi]         
04446                         movq mm2, mm1           
04447                                 add esi, eax    
04448                                 movq mm3, [edx]         
04449                         add edx, 8      
04450                                 movq mm4, [edx]         
04451                         add edx, 8      
04452                                 punpcklbw mm1, mm0      
04453                                 punpckhbw mm2, mm0      
04454                                 pmullw mm1, mm3         
04455                                 pmullw mm2, mm4         
04456                                 paddsw mm1, mm2         
04457                                 paddsw mm7, mm1         
04458                                 
04459                                 movq mm1, [esi]         
04460                         movq mm2, mm1           
04461                                 movq mm3, [edx]         
04462                         add edx, 8      
04463                                 movq mm4, [edx]         
04464                         punpcklbw mm1, mm0      
04465                                 punpckhbw mm2, mm0      
04466                                 pmullw mm1, mm3         
04467                                 pmullw mm2, mm4         
04468                                 paddsw mm1, mm2         
04469                                 paddsw mm7, mm1         
04470                                 
04471                                 movq mm3, mm7           
04472                                 psrlq mm7, 32           
04473                                 paddsw mm7, mm3         
04474                                 movq mm2, mm7           
04475                                 psrlq mm7, 16           
04476                                 paddsw mm7, mm2         
04477                                 
04478                                 movd mm1, eax           
04479                                 movd mm2, ebx           
04480                                 movd mm3, edx           
04481                                 movd eax, mm7           
04482                                 psraw mm7, 15           
04483                                 movd ebx, mm5           
04484                                 movd edx, mm7           
04485                                 idiv bx         
04486                                 movd mm7, eax           
04487                                 packuswb mm7, mm0       
04488                                 movd eax, mm7           
04489                                 mov [edi], al           
04490                                 movd edx, mm3           
04491                                 movd ebx, mm2           
04492                                 movd eax, mm1           
04493                                 
04494                                 movd esi, mm6           
04495                                 sub edx, 72     
04496                                 inc              esi            
04497                                 inc              edi            
04498                                 
04499                                 dec              ecx            
04500                                 jnz            L10332           
04501                                 add esi, 4      
04502                                 add edi, 4      
04503                                 dec              ebx            
04504                                 jnz            L10330           
04505                                 
04506                                 emms                            
04507                                 popa
04508                 }
04509 #else
04510                 asm volatile
04511                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
04512                         "xor       %%ebx, %%ebx \n\t"   
04513                         "mov           %5, %%bl \n\t"   
04514                         "movd      %%ebx, %%mm5 \n\t"   
04515                         "mov          %4, %%edx \n\t"   
04516                         "mov          %1, %%esi \n\t"   
04517                         "mov          %0, %%edi \n\t"   
04518                         "add          $2, %%edi \n\t"   
04519                         "mov          %3, %%eax \n\t"   
04520                         "shl          $1, %%eax \n\t"   
04521                         "add       %%eax, %%edi \n\t"   
04522                         "shr          $1, %%eax \n\t"   
04523                         "mov          %2, %%ebx \n\t"   
04524                         "sub          $4, %%ebx \n\t"   
04525                         
04526                         ".L10330:               \n\t" "mov       %%eax, %%ecx \n\t"     
04527                         "sub          $4, %%ecx \n\t"   
04528                         ".align 16              \n\t"   
04529                         ".L10332:               \n\t" "pxor      %%mm7, %%mm7 \n\t"     
04530                         "movd      %%esi, %%mm6 \n\t"   
04531                         
04532                         "movq    (%%esi), %%mm1 \n\t"   
04533                         "movq      %%mm1, %%mm2 \n\t"   
04534                         "add       %%eax, %%esi \n\t"   
04535                         "movq    (%%edx), %%mm3 \n\t"   
04536                         "add          $8, %%edx \n\t"   
04537                         "movq    (%%edx), %%mm4 \n\t"   
04538                         "add          $8, %%edx \n\t"   
04539                         "punpcklbw %%mm0, %%mm1 \n\t"   
04540                         "punpckhbw %%mm0, %%mm2 \n\t"   
04541                         "pmullw    %%mm3, %%mm1 \n\t"   
04542                         "pmullw    %%mm4, %%mm2 \n\t"   
04543                         "paddsw    %%mm2, %%mm1 \n\t"   
04544                         "paddsw    %%mm1, %%mm7 \n\t"   
04545                         
04546                         "movq    (%%esi), %%mm1 \n\t"   
04547                         "movq      %%mm1, %%mm2 \n\t"   
04548                         "add       %%eax, %%esi \n\t"   
04549                         "movq    (%%edx), %%mm3 \n\t"   
04550                         "add          $8, %%edx \n\t"   
04551                         "movq    (%%edx), %%mm4 \n\t"   
04552                         "add          $8, %%edx \n\t"   
04553                         "punpcklbw %%mm0, %%mm1 \n\t"   
04554                         "punpckhbw %%mm0, %%mm2 \n\t"   
04555                         "pmullw    %%mm3, %%mm1 \n\t"   
04556                         "pmullw    %%mm4, %%mm2 \n\t"   
04557                         "paddsw    %%mm2, %%mm1 \n\t"   
04558                         "paddsw    %%mm1, %%mm7 \n\t"   
04559                         
04560                         "movq    (%%esi), %%mm1 \n\t"   
04561                         "movq      %%mm1, %%mm2 \n\t"   
04562                         "add       %%eax, %%esi \n\t"   
04563                         "movq    (%%edx), %%mm3 \n\t"   
04564                         "add          $8, %%edx \n\t"   
04565                         "movq    (%%edx), %%mm4 \n\t"   
04566                         "add          $8, %%edx \n\t"   
04567                         "punpcklbw %%mm0, %%mm1 \n\t"   
04568                         "punpckhbw %%mm0, %%mm2 \n\t"   
04569                         "pmullw    %%mm3, %%mm1 \n\t"   
04570                         "pmullw    %%mm4, %%mm2 \n\t"   
04571                         "paddsw    %%mm2, %%mm1 \n\t"   
04572                         "paddsw    %%mm1, %%mm7 \n\t"   
04573                         
04574                         "movq    (%%esi), %%mm1 \n\t"   
04575                         "movq      %%mm1, %%mm2 \n\t"   
04576                         "add       %%eax, %%esi \n\t"   
04577                         "movq    (%%edx), %%mm3 \n\t"   
04578                         "add          $8, %%edx \n\t"   
04579                         "movq    (%%edx), %%mm4 \n\t"   
04580                         "add          $8, %%edx \n\t"   
04581                         "punpcklbw %%mm0, %%mm1 \n\t"   
04582                         "punpckhbw %%mm0, %%mm2 \n\t"   
04583                         "pmullw    %%mm3, %%mm1 \n\t"   
04584                         "pmullw    %%mm4, %%mm2 \n\t"   
04585                         "paddsw    %%mm2, %%mm1 \n\t"   
04586                         "paddsw    %%mm1, %%mm7 \n\t"   
04587                         
04588                         "movq    (%%esi), %%mm1 \n\t"   
04589                         "movq      %%mm1, %%mm2 \n\t"   
04590                         "movq    (%%edx), %%mm3 \n\t"   
04591                         "add          $8, %%edx \n\t"   
04592                         "movq    (%%edx), %%mm4 \n\t"   
04593                         "punpcklbw %%mm0, %%mm1 \n\t"   
04594                         "punpckhbw %%mm0, %%mm2 \n\t"   
04595                         "pmullw    %%mm3, %%mm1 \n\t"   
04596                         "pmullw    %%mm4, %%mm2 \n\t"   
04597                         "paddsw    %%mm2, %%mm1 \n\t"   
04598                         "paddsw    %%mm1, %%mm7 \n\t"   
04599                         
04600                         "movq      %%mm7, %%mm3 \n\t"   
04601                         "psrlq       $32, %%mm7 \n\t"   
04602                         "paddsw    %%mm3, %%mm7 \n\t"   
04603                         "movq      %%mm7, %%mm2 \n\t"   
04604                         "psrlq       $16, %%mm7 \n\t"   
04605                         "paddsw    %%mm2, %%mm7 \n\t"   
04606                         
04607                         "movd      %%eax, %%mm1 \n\t"   
04608                         "movd      %%ebx, %%mm2 \n\t"   
04609                         "movd      %%edx, %%mm3 \n\t"   
04610                         "movd      %%mm7, %%eax \n\t"   
04611                         "psraw       $15, %%mm7 \n\t"   
04612                         "movd      %%mm5, %%ebx \n\t"   
04613                         "movd      %%mm7, %%edx \n\t"   
04614                         "idivw             %%bx \n\t"   
04615                         "movd      %%eax, %%mm7 \n\t"   
04616                         "packuswb  %%mm0, %%mm7 \n\t"   
04617                         "movd      %%mm7, %%eax \n\t"   
04618                         "mov      %%al, (%%edi) \n\t"   
04619                         "movd      %%mm3, %%edx \n\t"   
04620                         "movd      %%mm2, %%ebx \n\t"   
04621                         "movd      %%mm1, %%eax \n\t"   
04622                         
04623                         "movd      %%mm6, %%esi \n\t"   
04624                         "sub         $72, %%edx \n\t"   
04625                         "inc              %%esi \n\t"   
04626                         "inc              %%edi \n\t"   
04627                         
04628                         "dec              %%ecx \n\t"   
04629                         "jnz            .L10332 \n\t"   
04630                         "add          $4, %%esi \n\t"   
04631                         "add          $4, %%edi \n\t"   
04632                         "dec              %%ebx \n\t"   
04633                         "jnz            .L10330 \n\t"   
04634                         
04635                         "emms                   \n\t"   
04636                         "popa                   \n\t":"=m" (Dest)       
04637                         :"m"(Src),              
04638                         "m"(rows),              
04639                         "m"(columns),           
04640                         "m"(Kernel),            
04641                         "m"(Divisor)            
04642                         );
04643 #endif
04644 #endif
04645                 return (0);
04646         } else {
04647                 
04648                 return (-1);
04649         }
04650 }
04651 
04666 int SDL_imageFilterConvolveKernel7x7Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns,
04667                                                                                    signed short *Kernel, unsigned char Divisor)
04668 {
04669         
04670         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
04671                 return(-1);
04672 
04673         if ((columns < 7) || (rows < 7) || (Divisor == 0))
04674                 return (-1);
04675 
04676         if ((SDL_imageFilterMMXdetect())) {
04677 #ifdef USE_MMX
04678 #if !defined(GCC__)
04679                 __asm
04680                 {
04681                         pusha
04682                                 pxor mm0, mm0           
04683                                 xor ebx, ebx    
04684                                 mov bl, Divisor         
04685                                 movd mm5, ebx           
04686                                 mov edx, Kernel         
04687                                 mov esi, Src    
04688                                 mov edi, Dest           
04689                                 add edi, 3      
04690                                 mov eax, columns        
04691                                 add edi, eax    
04692                                 add edi, eax
04693                                 add edi, eax
04694                                 mov ebx, rows           
04695                                 sub ebx, 6      
04696                                 
04697 L10340:
04698                         mov ecx, eax    
04699                                 sub ecx, 6      
04700                                 align 16                        
04701 L10342:
04702                         pxor mm7, mm7           
04703                                 movd mm6, esi           
04704                                 
04705                                 movq mm1, [esi]         
04706                         movq mm2, mm1           
04707                                 add esi, eax    
04708                                 movq mm3, [edx]         
04709                         add edx, 8      
04710                                 movq mm4, [edx]         
04711                         add edx, 8      
04712                                 punpcklbw mm1, mm0      
04713                                 punpckhbw mm2, mm0      
04714                                 pmullw mm1, mm3         
04715                                 pmullw mm2, mm4         
04716                                 paddsw mm1, mm2         
04717                                 paddsw mm7, mm1         
04718                                 
04719                                 movq mm1, [esi]         
04720                         movq mm2, mm1           
04721                                 add esi, eax    
04722                                 movq mm3, [edx]         
04723                         add edx, 8      
04724                                 movq mm4, [edx]         
04725                         add edx, 8      
04726                                 punpcklbw mm1, mm0      
04727                                 punpckhbw mm2, mm0      
04728                                 pmullw mm1, mm3         
04729                                 pmullw mm2, mm4         
04730                                 paddsw mm1, mm2         
04731                                 paddsw mm7, mm1         
04732                                 
04733                                 movq mm1, [esi]         
04734                         movq mm2, mm1           
04735                                 add esi, eax    
04736                                 movq mm3, [edx]         
04737                         add edx, 8      
04738                                 movq mm4, [edx]         
04739                         add edx, 8      
04740                                 punpcklbw mm1, mm0      
04741                                 punpckhbw mm2, mm0      
04742                                 pmullw mm1, mm3         
04743                                 pmullw mm2, mm4         
04744                                 paddsw mm1, mm2         
04745                                 paddsw mm7, mm1         
04746                                 
04747                                 movq mm1, [esi]         
04748                         movq mm2, mm1           
04749                                 add esi, eax    
04750                                 movq mm3, [edx]         
04751                         add edx, 8      
04752                                 movq mm4, [edx]         
04753                         add edx, 8      
04754                                 punpcklbw mm1, mm0      
04755                                 punpckhbw mm2, mm0      
04756                                 pmullw mm1, mm3         
04757                                 pmullw mm2, mm4         
04758                                 paddsw mm1, mm2         
04759                                 paddsw mm7, mm1         
04760                                 
04761                                 movq mm1, [esi]         
04762                         movq mm2, mm1           
04763                                 add esi, eax    
04764                                 movq mm3, [edx]         
04765                         add edx, 8      
04766                                 movq mm4, [edx]         
04767                         add edx, 8      
04768                                 punpcklbw mm1, mm0      
04769                                 punpckhbw mm2, mm0      
04770                                 pmullw mm1, mm3         
04771                                 pmullw mm2, mm4         
04772                                 paddsw mm1, mm2         
04773                                 paddsw mm7, mm1         
04774                                 
04775                                 movq mm1, [esi]         
04776                         movq mm2, mm1           
04777                                 add esi, eax    
04778                                 movq mm3, [edx]         
04779                         add edx, 8      
04780                                 movq mm4, [edx]         
04781                         add edx, 8      
04782                                 punpcklbw mm1, mm0      
04783                                 punpckhbw mm2, mm0      
04784                                 pmullw mm1, mm3         
04785                                 pmullw mm2, mm4         
04786                                 paddsw mm1, mm2         
04787                                 paddsw mm7, mm1         
04788                                 
04789                                 movq mm1, [esi]         
04790                         movq mm2, mm1           
04791                                 movq mm3, [edx]         
04792                         add edx, 8      
04793                                 movq mm4, [edx]         
04794                         punpcklbw mm1, mm0      
04795                                 punpckhbw mm2, mm0      
04796                                 pmullw mm1, mm3         
04797                                 pmullw mm2, mm4         
04798                                 paddsw mm1, mm2         
04799                                 paddsw mm7, mm1         
04800                                 
04801                                 movq mm3, mm7           
04802                                 psrlq mm7, 32           
04803                                 paddsw mm7, mm3         
04804                                 movq mm2, mm7           
04805                                 psrlq mm7, 16           
04806                                 paddsw mm7, mm2         
04807                                 
04808                                 movd mm1, eax           
04809                                 movd mm2, ebx           
04810                                 movd mm3, edx           
04811                                 movd eax, mm7           
04812                                 psraw mm7, 15           
04813                                 movd ebx, mm5           
04814                                 movd edx, mm7           
04815                                 idiv bx         
04816                                 movd mm7, eax           
04817                                 packuswb mm7, mm0       
04818                                 movd eax, mm7           
04819                                 mov [edi], al           
04820                                 movd edx, mm3           
04821                                 movd ebx, mm2           
04822                                 movd eax, mm1           
04823                                 
04824                                 movd esi, mm6           
04825                                 sub edx, 104    
04826                                 inc              esi            
04827                                 inc              edi            
04828                                 
04829                                 dec              ecx            
04830                                 jnz            L10342           
04831                                 add esi, 6      
04832                                 add edi, 6      
04833                                 dec              ebx            
04834                                 jnz            L10340           
04835                                 
04836                                 emms                            
04837                                 popa
04838                 }
04839 #else
04840                 asm volatile
04841                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
04842                         "xor       %%ebx, %%ebx \n\t"   
04843                         "mov           %5, %%bl \n\t"   
04844                         "movd      %%ebx, %%mm5 \n\t"   
04845                         "mov          %4, %%edx \n\t"   
04846                         "mov          %1, %%esi \n\t"   
04847                         "mov          %0, %%edi \n\t"   
04848                         "add          $3, %%edi \n\t"   
04849                         "mov          %3, %%eax \n\t"   
04850                         "add       %%eax, %%edi \n\t"   
04851                         "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "mov          %2, %%ebx \n\t"       
04852                         "sub          $6, %%ebx \n\t"   
04853                         
04854                         ".L10340:               \n\t" "mov       %%eax, %%ecx \n\t"     
04855                         "sub          $6, %%ecx \n\t"   
04856                         ".align 16              \n\t"   
04857                         ".L10342:               \n\t" "pxor      %%mm7, %%mm7 \n\t"     
04858                         "movd      %%esi, %%mm6 \n\t"   
04859                         
04860                         "movq    (%%esi), %%mm1 \n\t"   
04861                         "movq      %%mm1, %%mm2 \n\t"   
04862                         "add       %%eax, %%esi \n\t"   
04863                         "movq    (%%edx), %%mm3 \n\t"   
04864                         "add          $8, %%edx \n\t"   
04865                         "movq    (%%edx), %%mm4 \n\t"   
04866                         "add          $8, %%edx \n\t"   
04867                         "punpcklbw %%mm0, %%mm1 \n\t"   
04868                         "punpckhbw %%mm0, %%mm2 \n\t"   
04869                         "pmullw    %%mm3, %%mm1 \n\t"   
04870                         "pmullw    %%mm4, %%mm2 \n\t"   
04871                         "paddsw    %%mm2, %%mm1 \n\t"   
04872                         "paddsw    %%mm1, %%mm7 \n\t"   
04873                         
04874                         "movq    (%%esi), %%mm1 \n\t"   
04875                         "movq      %%mm1, %%mm2 \n\t"   
04876                         "add       %%eax, %%esi \n\t"   
04877                         "movq    (%%edx), %%mm3 \n\t"   
04878                         "add          $8, %%edx \n\t"   
04879                         "movq    (%%edx), %%mm4 \n\t"   
04880                         "add          $8, %%edx \n\t"   
04881                         "punpcklbw %%mm0, %%mm1 \n\t"   
04882                         "punpckhbw %%mm0, %%mm2 \n\t"   
04883                         "pmullw    %%mm3, %%mm1 \n\t"   
04884                         "pmullw    %%mm4, %%mm2 \n\t"   
04885                         "paddsw    %%mm2, %%mm1 \n\t"   
04886                         "paddsw    %%mm1, %%mm7 \n\t"   
04887                         
04888                         "movq    (%%esi), %%mm1 \n\t"   
04889                         "movq      %%mm1, %%mm2 \n\t"   
04890                         "add       %%eax, %%esi \n\t"   
04891                         "movq    (%%edx), %%mm3 \n\t"   
04892                         "add          $8, %%edx \n\t"   
04893                         "movq    (%%edx), %%mm4 \n\t"   
04894                         "add          $8, %%edx \n\t"   
04895                         "punpcklbw %%mm0, %%mm1 \n\t"   
04896                         "punpckhbw %%mm0, %%mm2 \n\t"   
04897                         "pmullw    %%mm3, %%mm1 \n\t"   
04898                         "pmullw    %%mm4, %%mm2 \n\t"   
04899                         "paddsw    %%mm2, %%mm1 \n\t"   
04900                         "paddsw    %%mm1, %%mm7 \n\t"   
04901                         
04902                         "movq    (%%esi), %%mm1 \n\t"   
04903                         "movq      %%mm1, %%mm2 \n\t"   
04904                         "add       %%eax, %%esi \n\t"   
04905                         "movq    (%%edx), %%mm3 \n\t"   
04906                         "add          $8, %%edx \n\t"   
04907                         "movq    (%%edx), %%mm4 \n\t"   
04908                         "add          $8, %%edx \n\t"   
04909                         "punpcklbw %%mm0, %%mm1 \n\t"   
04910                         "punpckhbw %%mm0, %%mm2 \n\t"   
04911                         "pmullw    %%mm3, %%mm1 \n\t"   
04912                         "pmullw    %%mm4, %%mm2 \n\t"   
04913                         "paddsw    %%mm2, %%mm1 \n\t"   
04914                         "paddsw    %%mm1, %%mm7 \n\t"   
04915                         
04916                         "movq    (%%esi), %%mm1 \n\t"   
04917                         "movq      %%mm1, %%mm2 \n\t"   
04918                         "add       %%eax, %%esi \n\t"   
04919                         "movq    (%%edx), %%mm3 \n\t"   
04920                         "add          $8, %%edx \n\t"   
04921                         "movq    (%%edx), %%mm4 \n\t"   
04922                         "add          $8, %%edx \n\t"   
04923                         "punpcklbw %%mm0, %%mm1 \n\t"   
04924                         "punpckhbw %%mm0, %%mm2 \n\t"   
04925                         "pmullw    %%mm3, %%mm1 \n\t"   
04926                         "pmullw    %%mm4, %%mm2 \n\t"   
04927                         "paddsw    %%mm2, %%mm1 \n\t"   
04928                         "paddsw    %%mm1, %%mm7 \n\t"   
04929                         
04930                         "movq    (%%esi), %%mm1 \n\t"   
04931                         "movq      %%mm1, %%mm2 \n\t"   
04932                         "add       %%eax, %%esi \n\t"   
04933                         "movq    (%%edx), %%mm3 \n\t"   
04934                         "add          $8, %%edx \n\t"   
04935                         "movq    (%%edx), %%mm4 \n\t"   
04936                         "add          $8, %%edx \n\t"   
04937                         "punpcklbw %%mm0, %%mm1 \n\t"   
04938                         "punpckhbw %%mm0, %%mm2 \n\t"   
04939                         "pmullw    %%mm3, %%mm1 \n\t"   
04940                         "pmullw    %%mm4, %%mm2 \n\t"   
04941                         "paddsw    %%mm2, %%mm1 \n\t"   
04942                         "paddsw    %%mm1, %%mm7 \n\t"   
04943                         
04944                         "movq    (%%esi), %%mm1 \n\t"   
04945                         "movq      %%mm1, %%mm2 \n\t"   
04946                         "movq    (%%edx), %%mm3 \n\t"   
04947                         "add          $8, %%edx \n\t"   
04948                         "movq    (%%edx), %%mm4 \n\t"   
04949                         "punpcklbw %%mm0, %%mm1 \n\t"   
04950                         "punpckhbw %%mm0, %%mm2 \n\t"   
04951                         "pmullw    %%mm3, %%mm1 \n\t"   
04952                         "pmullw    %%mm4, %%mm2 \n\t"   
04953                         "paddsw    %%mm2, %%mm1 \n\t"   
04954                         "paddsw    %%mm1, %%mm7 \n\t"   
04955                         
04956                         "movq      %%mm7, %%mm3 \n\t"   
04957                         "psrlq       $32, %%mm7 \n\t"   
04958                         "paddsw    %%mm3, %%mm7 \n\t"   
04959                         "movq      %%mm7, %%mm2 \n\t"   
04960                         "psrlq       $16, %%mm7 \n\t"   
04961                         "paddsw    %%mm2, %%mm7 \n\t"   
04962                         
04963                         "movd      %%eax, %%mm1 \n\t"   
04964                         "movd      %%ebx, %%mm2 \n\t"   
04965                         "movd      %%edx, %%mm3 \n\t"   
04966                         "movd      %%mm7, %%eax \n\t"   
04967                         "psraw       $15, %%mm7 \n\t"   
04968                         "movd      %%mm5, %%ebx \n\t"   
04969                         "movd      %%mm7, %%edx \n\t"   
04970                         "idivw             %%bx \n\t"   
04971                         "movd      %%eax, %%mm7 \n\t"   
04972                         "packuswb  %%mm0, %%mm7 \n\t"   
04973                         "movd      %%mm7, %%eax \n\t"   
04974                         "mov      %%al, (%%edi) \n\t"   
04975                         "movd      %%mm3, %%edx \n\t"   
04976                         "movd      %%mm2, %%ebx \n\t"   
04977                         "movd      %%mm1, %%eax \n\t"   
04978                         
04979                         "movd      %%mm6, %%esi \n\t"   
04980                         "sub        $104, %%edx \n\t"   
04981                         "inc              %%esi \n\t"   
04982                         "inc              %%edi \n\t"   
04983                         
04984                         "dec              %%ecx \n\t"   
04985                         "jnz            .L10342 \n\t"   
04986                         "add          $6, %%esi \n\t"   
04987                         "add          $6, %%edi \n\t"   
04988                         "dec              %%ebx \n\t"   
04989                         "jnz            .L10340 \n\t"   
04990                         
04991                         "emms                   \n\t"   
04992                         "popa                   \n\t":"=m" (Dest)       
04993                         :"m"(Src),              
04994                         "m"(rows),              
04995                         "m"(columns),           
04996                         "m"(Kernel),            
04997                         "m"(Divisor)            
04998                         );
04999 #endif
05000 #endif
05001                 return (0);
05002         } else {
05003                 
05004                 return (-1);
05005         }
05006 }
05007 
05022 int SDL_imageFilterConvolveKernel9x9Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns,
05023                                                                                    signed short *Kernel, unsigned char Divisor)
05024 {
05025         
05026         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
05027                 return(-1);
05028 
05029         if ((columns < 9) || (rows < 9) || (Divisor == 0))
05030                 return (-1);
05031 
05032         if ((SDL_imageFilterMMXdetect())) {
05033 #ifdef USE_MMX
05034 #if !defined(GCC__)
05035                 __asm
05036                 {
05037                         pusha
05038                                 pxor mm0, mm0           
05039                                 xor ebx, ebx    
05040                                 mov bl, Divisor         
05041                                 movd mm5, ebx           
05042                                 mov edx, Kernel         
05043                                 mov esi, Src    
05044                                 mov edi, Dest           
05045                                 add edi, 4      
05046                                 mov eax, columns        
05047                                 add edi, eax    
05048                                 add edi, eax
05049                                 add edi, eax
05050                                 add edi, eax
05051                                 mov ebx, rows           
05052                                 sub ebx, 8      
05053                                 
05054 L10350:
05055                         mov ecx, eax    
05056                                 sub ecx, 8      
05057                                 align 16                        
05058 L10352:
05059                         pxor mm7, mm7           
05060                                 movd mm6, esi           
05061                                 
05062                                 movq mm1, [esi]         
05063                         movq mm2, mm1           
05064                                 inc              esi            
05065                                 movq mm3, [edx]         
05066                         add edx, 8      
05067                                 movq mm4, [edx]         
05068                         add edx, 8      
05069                                 punpcklbw mm1, mm0      
05070                                 punpckhbw mm2, mm0      
05071                                 pmullw mm1, mm3         
05072                                 pmullw mm2, mm4         
05073                                 paddsw mm1, mm2         
05074                                 paddsw mm7, mm1         
05075                                 movq mm1, [esi]         
05076                         dec              esi
05077                                 add esi, eax    
05078                                 movq mm3, [edx]         
05079                         add edx, 8      
05080                                 punpcklbw mm1, mm0      
05081                                 pmullw mm1, mm3         
05082                                 paddsw mm7, mm1         
05083                                 
05084                                 movq mm1, [esi]         
05085                         movq mm2, mm1           
05086                                 inc              esi            
05087                                 movq mm3, [edx]         
05088                         add edx, 8      
05089                                 movq mm4, [edx]         
05090                         add edx, 8      
05091                                 punpcklbw mm1, mm0      
05092                                 punpckhbw mm2, mm0      
05093                                 pmullw mm1, mm3         
05094                                 pmullw mm2, mm4         
05095                                 paddsw mm1, mm2         
05096                                 paddsw mm7, mm1         
05097                                 movq mm1, [esi]         
05098                         dec              esi
05099                                 add esi, eax    
05100                                 movq mm3, [edx]         
05101                         add edx, 8      
05102                                 punpcklbw mm1, mm0      
05103                                 pmullw mm1, mm3         
05104                                 paddsw mm7, mm1         
05105                                 
05106                                 movq mm1, [esi]         
05107                         movq mm2, mm1           
05108                                 inc              esi            
05109                                 movq mm3, [edx]         
05110                         add edx, 8      
05111                                 movq mm4, [edx]         
05112                         add edx, 8      
05113                                 punpcklbw mm1, mm0      
05114                                 punpckhbw mm2, mm0      
05115                                 pmullw mm1, mm3         
05116                                 pmullw mm2, mm4         
05117                                 paddsw mm1, mm2         
05118                                 paddsw mm7, mm1         
05119                                 movq mm1, [esi]         
05120                         dec              esi
05121                                 add esi, eax    
05122                                 movq mm3, [edx]         
05123                         add edx, 8      
05124                                 punpcklbw mm1, mm0      
05125                                 pmullw mm1, mm3         
05126                                 paddsw mm7, mm1         
05127                                 
05128                                 movq mm1, [esi]         
05129                         movq mm2, mm1           
05130                                 inc              esi            
05131                                 movq mm3, [edx]         
05132                         add edx, 8      
05133                                 movq mm4, [edx]         
05134                         add edx, 8      
05135                                 punpcklbw mm1, mm0      
05136                                 punpckhbw mm2, mm0      
05137                                 pmullw mm1, mm3         
05138                                 pmullw mm2, mm4         
05139                                 paddsw mm1, mm2         
05140                                 paddsw mm7, mm1         
05141                                 movq mm1, [esi]         
05142                         dec              esi
05143                                 add esi, eax    
05144                                 movq mm3, [edx]         
05145                         add edx, 8      
05146                                 punpcklbw mm1, mm0      
05147                                 pmullw mm1, mm3         
05148                                 paddsw mm7, mm1         
05149                                 
05150                                 movq mm1, [esi]         
05151                         movq mm2, mm1           
05152                                 inc              esi            
05153                                 movq mm3, [edx]         
05154                         add edx, 8      
05155                                 movq mm4, [edx]         
05156                         add edx, 8      
05157                                 punpcklbw mm1, mm0      
05158                                 punpckhbw mm2, mm0      
05159                                 pmullw mm1, mm3         
05160                                 pmullw mm2, mm4         
05161                                 paddsw mm1, mm2         
05162                                 paddsw mm7, mm1         
05163                                 movq mm1, [esi]         
05164                         dec              esi
05165                                 add esi, eax    
05166                                 movq mm3, [edx]         
05167                         add edx, 8      
05168                                 punpcklbw mm1, mm0      
05169                                 pmullw mm1, mm3         
05170                                 paddsw mm7, mm1         
05171                                 
05172                                 movq mm1, [esi]         
05173                         movq mm2, mm1           
05174                                 inc              esi            
05175                                 movq mm3, [edx]         
05176                         add edx, 8      
05177                                 movq mm4, [edx]         
05178                         add edx, 8      
05179                                 punpcklbw mm1, mm0      
05180                                 punpckhbw mm2, mm0      
05181                                 pmullw mm1, mm3         
05182                                 pmullw mm2, mm4         
05183                                 paddsw mm1, mm2         
05184                                 paddsw mm7, mm1         
05185                                 movq mm1, [esi]         
05186                         dec              esi
05187                                 add esi, eax    
05188                                 movq mm3, [edx]         
05189                         add edx, 8      
05190                                 punpcklbw mm1, mm0      
05191                                 pmullw mm1, mm3         
05192                                 paddsw mm7, mm1         
05193                                 
05194                                 movq mm1, [esi]         
05195                         movq mm2, mm1           
05196                                 inc              esi            
05197                                 movq mm3, [edx]         
05198                         add edx, 8      
05199                                 movq mm4, [edx]         
05200                         add edx, 8      
05201                                 punpcklbw mm1, mm0      
05202                                 punpckhbw mm2, mm0      
05203                                 pmullw mm1, mm3         
05204                                 pmullw mm2, mm4         
05205                                 paddsw mm1, mm2         
05206                                 paddsw mm7, mm1         
05207                                 movq mm1, [esi]         
05208                         dec              esi
05209                                 add esi, eax    
05210                                 movq mm3, [edx]         
05211                         add edx, 8      
05212                                 punpcklbw mm1, mm0      
05213                                 pmullw mm1, mm3         
05214                                 paddsw mm7, mm1         
05215                                 
05216                                 movq mm1, [esi]         
05217                         movq mm2, mm1           
05218                                 inc              esi            
05219                                 movq mm3, [edx]         
05220                         add edx, 8      
05221                                 movq mm4, [edx]         
05222                         add edx, 8      
05223                                 punpcklbw mm1, mm0      
05224                                 punpckhbw mm2, mm0      
05225                                 pmullw mm1, mm3         
05226                                 pmullw mm2, mm4         
05227                                 paddsw mm1, mm2         
05228                                 paddsw mm7, mm1         
05229                                 movq mm1, [esi]         
05230                         dec              esi
05231                                 add esi, eax    
05232                                 movq mm3, [edx]         
05233                         add edx, 8      
05234                                 punpcklbw mm1, mm0      
05235                                 pmullw mm1, mm3         
05236                                 paddsw mm7, mm1         
05237                                 
05238                                 movq mm1, [esi]         
05239                         movq mm2, mm1           
05240                                 inc              esi            
05241                                 movq mm3, [edx]         
05242                         add edx, 8      
05243                                 movq mm4, [edx]         
05244                         add edx, 8      
05245                                 punpcklbw mm1, mm0      
05246                                 punpckhbw mm2, mm0      
05247                                 pmullw mm1, mm3         
05248                                 pmullw mm2, mm4         
05249                                 paddsw mm1, mm2         
05250                                 paddsw mm7, mm1         
05251                                 movq mm1, [esi]         
05252                         movq mm3, [edx]         
05253                         punpcklbw mm1, mm0      
05254                                 pmullw mm1, mm3         
05255                                 paddsw mm7, mm1         
05256                                 
05257                                 movq mm3, mm7           
05258                                 psrlq mm7, 32           
05259                                 paddsw mm7, mm3         
05260                                 movq mm2, mm7           
05261                                 psrlq mm7, 16           
05262                                 paddsw mm7, mm2         
05263                                 
05264                                 movd mm1, eax           
05265                                 movd mm2, ebx           
05266                                 movd mm3, edx           
05267                                 movd eax, mm7           
05268                                 psraw mm7, 15           
05269                                 movd ebx, mm5           
05270                                 movd edx, mm7           
05271                                 idiv bx         
05272                                 movd mm7, eax           
05273                                 packuswb mm7, mm0       
05274                                 movd eax, mm7           
05275                                 mov [edi], al           
05276                                 movd edx, mm3           
05277                                 movd ebx, mm2           
05278                                 movd eax, mm1           
05279                                 
05280                                 movd esi, mm6           
05281                                 sub edx, 208    
05282                                 inc              esi            
05283                                 inc              edi            
05284                                 
05285                                 dec              ecx            
05286                                 jnz            L10352           
05287                                 add esi, 8      
05288                                 add edi, 8      
05289                                 dec              ebx            
05290                                 jnz            L10350           
05291                                 
05292                                 emms                            
05293                                 popa
05294                 }
05295 #else
05296                 asm volatile
05297                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
05298                         "xor       %%ebx, %%ebx \n\t"   
05299                         "mov           %5, %%bl \n\t"   
05300                         "movd      %%ebx, %%mm5 \n\t"   
05301                         "mov          %4, %%edx \n\t"   
05302                         "mov          %1, %%esi \n\t"   
05303                         "mov          %0, %%edi \n\t"   
05304                         "add          $4, %%edi \n\t"   
05305                         "mov          %3, %%eax \n\t"   
05306                         "add       %%eax, %%edi \n\t"   
05307                         "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "mov          %2, %%ebx \n\t" 
05308                         "sub          $8, %%ebx \n\t"   
05309                         
05310                         ".L10350:               \n\t" "mov       %%eax, %%ecx \n\t"     
05311                         "sub          $8, %%ecx \n\t"   
05312                         ".align 16              \n\t"   
05313                         ".L10352:               \n\t" "pxor      %%mm7, %%mm7 \n\t"     
05314                         "movd      %%esi, %%mm6 \n\t"   
05315                         
05316                         "movq    (%%esi), %%mm1 \n\t"   
05317                         "movq      %%mm1, %%mm2 \n\t"   
05318                         "inc              %%esi \n\t"   
05319                         "movq    (%%edx), %%mm3 \n\t"   
05320                         "add          $8, %%edx \n\t"   
05321                         "movq    (%%edx), %%mm4 \n\t"   
05322                         "add          $8, %%edx \n\t"   
05323                         "punpcklbw %%mm0, %%mm1 \n\t"   
05324                         "punpckhbw %%mm0, %%mm2 \n\t"   
05325                         "pmullw    %%mm3, %%mm1 \n\t"   
05326                         "pmullw    %%mm4, %%mm2 \n\t"   
05327                         "paddsw    %%mm2, %%mm1 \n\t"   
05328                         "paddsw    %%mm1, %%mm7 \n\t"   
05329                         "movq    (%%esi), %%mm1 \n\t"   
05330                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
05331                         "movq    (%%edx), %%mm3 \n\t"   
05332                         "add          $8, %%edx \n\t"   
05333                         "punpcklbw %%mm0, %%mm1 \n\t"   
05334                         "pmullw    %%mm3, %%mm1 \n\t"   
05335                         "paddsw    %%mm1, %%mm7 \n\t"   
05336                         
05337                         "movq    (%%esi), %%mm1 \n\t"   
05338                         "movq      %%mm1, %%mm2 \n\t"   
05339                         "inc              %%esi \n\t"   
05340                         "movq    (%%edx), %%mm3 \n\t"   
05341                         "add          $8, %%edx \n\t"   
05342                         "movq    (%%edx), %%mm4 \n\t"   
05343                         "add          $8, %%edx \n\t"   
05344                         "punpcklbw %%mm0, %%mm1 \n\t"   
05345                         "punpckhbw %%mm0, %%mm2 \n\t"   
05346                         "pmullw    %%mm3, %%mm1 \n\t"   
05347                         "pmullw    %%mm4, %%mm2 \n\t"   
05348                         "paddsw    %%mm2, %%mm1 \n\t"   
05349                         "paddsw    %%mm1, %%mm7 \n\t"   
05350                         "movq    (%%esi), %%mm1 \n\t"   
05351                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
05352                         "movq    (%%edx), %%mm3 \n\t"   
05353                         "add          $8, %%edx \n\t"   
05354                         "punpcklbw %%mm0, %%mm1 \n\t"   
05355                         "pmullw    %%mm3, %%mm1 \n\t"   
05356                         "paddsw    %%mm1, %%mm7 \n\t"   
05357                         
05358                         "movq    (%%esi), %%mm1 \n\t"   
05359                         "movq      %%mm1, %%mm2 \n\t"   
05360                         "inc              %%esi \n\t"   
05361                         "movq    (%%edx), %%mm3 \n\t"   
05362                         "add          $8, %%edx \n\t"   
05363                         "movq    (%%edx), %%mm4 \n\t"   
05364                         "add          $8, %%edx \n\t"   
05365                         "punpcklbw %%mm0, %%mm1 \n\t"   
05366                         "punpckhbw %%mm0, %%mm2 \n\t"   
05367                         "pmullw    %%mm3, %%mm1 \n\t"   
05368                         "pmullw    %%mm4, %%mm2 \n\t"   
05369                         "paddsw    %%mm2, %%mm1 \n\t"   
05370                         "paddsw    %%mm1, %%mm7 \n\t"   
05371                         "movq    (%%esi), %%mm1 \n\t"   
05372                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
05373                         "movq    (%%edx), %%mm3 \n\t"   
05374                         "add          $8, %%edx \n\t"   
05375                         "punpcklbw %%mm0, %%mm1 \n\t"   
05376                         "pmullw    %%mm3, %%mm1 \n\t"   
05377                         "paddsw    %%mm1, %%mm7 \n\t"   
05378                         
05379                         "movq    (%%esi), %%mm1 \n\t"   
05380                         "movq      %%mm1, %%mm2 \n\t"   
05381                         "inc              %%esi \n\t"   
05382                         "movq    (%%edx), %%mm3 \n\t"   
05383                         "add          $8, %%edx \n\t"   
05384                         "movq    (%%edx), %%mm4 \n\t"   
05385                         "add          $8, %%edx \n\t"   
05386                         "punpcklbw %%mm0, %%mm1 \n\t"   
05387                         "punpckhbw %%mm0, %%mm2 \n\t"   
05388                         "pmullw    %%mm3, %%mm1 \n\t"   
05389                         "pmullw    %%mm4, %%mm2 \n\t"   
05390                         "paddsw    %%mm2, %%mm1 \n\t"   
05391                         "paddsw    %%mm1, %%mm7 \n\t"   
05392                         "movq    (%%esi), %%mm1 \n\t"   
05393                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
05394                         "movq    (%%edx), %%mm3 \n\t"   
05395                         "add          $8, %%edx \n\t"   
05396                         "punpcklbw %%mm0, %%mm1 \n\t"   
05397                         "pmullw    %%mm3, %%mm1 \n\t"   
05398                         "paddsw    %%mm1, %%mm7 \n\t"   
05399                         
05400                         "movq    (%%esi), %%mm1 \n\t"   
05401                         "movq      %%mm1, %%mm2 \n\t"   
05402                         "inc              %%esi \n\t"   
05403                         "movq    (%%edx), %%mm3 \n\t"   
05404                         "add          $8, %%edx \n\t"   
05405                         "movq    (%%edx), %%mm4 \n\t"   
05406                         "add          $8, %%edx \n\t"   
05407                         "punpcklbw %%mm0, %%mm1 \n\t"   
05408                         "punpckhbw %%mm0, %%mm2 \n\t"   
05409                         "pmullw    %%mm3, %%mm1 \n\t"   
05410                         "pmullw    %%mm4, %%mm2 \n\t"   
05411                         "paddsw    %%mm2, %%mm1 \n\t"   
05412                         "paddsw    %%mm1, %%mm7 \n\t"   
05413                         "movq    (%%esi), %%mm1 \n\t"   
05414                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
05415                         "movq    (%%edx), %%mm3 \n\t"   
05416                         "add          $8, %%edx \n\t"   
05417                         "punpcklbw %%mm0, %%mm1 \n\t"   
05418                         "pmullw    %%mm3, %%mm1 \n\t"   
05419                         "paddsw    %%mm1, %%mm7 \n\t"   
05420                         
05421                         "movq    (%%esi), %%mm1 \n\t"   
05422                         "movq      %%mm1, %%mm2 \n\t"   
05423                         "inc              %%esi \n\t"   
05424                         "movq    (%%edx), %%mm3 \n\t"   
05425                         "add          $8, %%edx \n\t"   
05426                         "movq    (%%edx), %%mm4 \n\t"   
05427                         "add          $8, %%edx \n\t"   
05428                         "punpcklbw %%mm0, %%mm1 \n\t"   
05429                         "punpckhbw %%mm0, %%mm2 \n\t"   
05430                         "pmullw    %%mm3, %%mm1 \n\t"   
05431                         "pmullw    %%mm4, %%mm2 \n\t"   
05432                         "paddsw    %%mm2, %%mm1 \n\t"   
05433                         "paddsw    %%mm1, %%mm7 \n\t"   
05434                         "movq    (%%esi), %%mm1 \n\t"   
05435                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
05436                         "movq    (%%edx), %%mm3 \n\t"   
05437                         "add          $8, %%edx \n\t"   
05438                         "punpcklbw %%mm0, %%mm1 \n\t"   
05439                         "pmullw    %%mm3, %%mm1 \n\t"   
05440                         "paddsw    %%mm1, %%mm7 \n\t"   
05441                         
05442                         "movq    (%%esi), %%mm1 \n\t"   
05443                         "movq      %%mm1, %%mm2 \n\t"   
05444                         "inc              %%esi \n\t"   
05445                         "movq    (%%edx), %%mm3 \n\t"   
05446                         "add          $8, %%edx \n\t"   
05447                         "movq    (%%edx), %%mm4 \n\t"   
05448                         "add          $8, %%edx \n\t"   
05449                         "punpcklbw %%mm0, %%mm1 \n\t"   
05450                         "punpckhbw %%mm0, %%mm2 \n\t"   
05451                         "pmullw    %%mm3, %%mm1 \n\t"   
05452                         "pmullw    %%mm4, %%mm2 \n\t"   
05453                         "paddsw    %%mm2, %%mm1 \n\t"   
05454                         "paddsw    %%mm1, %%mm7 \n\t"   
05455                         "movq    (%%esi), %%mm1 \n\t"   
05456                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
05457                         "movq    (%%edx), %%mm3 \n\t"   
05458                         "add          $8, %%edx \n\t"   
05459                         "punpcklbw %%mm0, %%mm1 \n\t"   
05460                         "pmullw    %%mm3, %%mm1 \n\t"   
05461                         "paddsw    %%mm1, %%mm7 \n\t"   
05462                         
05463                         "movq    (%%esi), %%mm1 \n\t"   
05464                         "movq      %%mm1, %%mm2 \n\t"   
05465                         "inc              %%esi \n\t"   
05466                         "movq    (%%edx), %%mm3 \n\t"   
05467                         "add          $8, %%edx \n\t"   
05468                         "movq    (%%edx), %%mm4 \n\t"   
05469                         "add          $8, %%edx \n\t"   
05470                         "punpcklbw %%mm0, %%mm1 \n\t"   
05471                         "punpckhbw %%mm0, %%mm2 \n\t"   
05472                         "pmullw    %%mm3, %%mm1 \n\t"   
05473                         "pmullw    %%mm4, %%mm2 \n\t"   
05474                         "paddsw    %%mm2, %%mm1 \n\t"   
05475                         "paddsw    %%mm1, %%mm7 \n\t"   
05476                         "movq    (%%esi), %%mm1 \n\t"   
05477                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
05478                         "movq    (%%edx), %%mm3 \n\t"   
05479                         "add          $8, %%edx \n\t"   
05480                         "punpcklbw %%mm0, %%mm1 \n\t"   
05481                         "pmullw    %%mm3, %%mm1 \n\t"   
05482                         "paddsw    %%mm1, %%mm7 \n\t"   
05483                         
05484                         "movq    (%%esi), %%mm1 \n\t"   
05485                         "movq      %%mm1, %%mm2 \n\t"   
05486                         "inc              %%esi \n\t"   
05487                         "movq    (%%edx), %%mm3 \n\t"   
05488                         "add          $8, %%edx \n\t"   
05489                         "movq    (%%edx), %%mm4 \n\t"   
05490                         "add          $8, %%edx \n\t"   
05491                         "punpcklbw %%mm0, %%mm1 \n\t"   
05492                         "punpckhbw %%mm0, %%mm2 \n\t"   
05493                         "pmullw    %%mm3, %%mm1 \n\t"   
05494                         "pmullw    %%mm4, %%mm2 \n\t"   
05495                         "paddsw    %%mm2, %%mm1 \n\t"   
05496                         "paddsw    %%mm1, %%mm7 \n\t"   
05497                         "movq    (%%esi), %%mm1 \n\t"   
05498                         "movq    (%%edx), %%mm3 \n\t"   
05499                         "punpcklbw %%mm0, %%mm1 \n\t"   
05500                         "pmullw    %%mm3, %%mm1 \n\t"   
05501                         "paddsw    %%mm1, %%mm7 \n\t"   
05502                         
05503                         "movq      %%mm7, %%mm3 \n\t"   
05504                         "psrlq       $32, %%mm7 \n\t"   
05505                         "paddsw    %%mm3, %%mm7 \n\t"   
05506                         "movq      %%mm7, %%mm2 \n\t"   
05507                         "psrlq       $16, %%mm7 \n\t"   
05508                         "paddsw    %%mm2, %%mm7 \n\t"   
05509                         
05510                         "movd      %%eax, %%mm1 \n\t"   
05511                         "movd      %%ebx, %%mm2 \n\t"   
05512                         "movd      %%edx, %%mm3 \n\t"   
05513                         "movd      %%mm7, %%eax \n\t"   
05514                         "psraw       $15, %%mm7 \n\t"   
05515                         "movd      %%mm5, %%ebx \n\t"   
05516                         "movd      %%mm7, %%edx \n\t"   
05517                         "idivw             %%bx \n\t"   
05518                         "movd      %%eax, %%mm7 \n\t"   
05519                         "packuswb  %%mm0, %%mm7 \n\t"   
05520                         "movd      %%mm7, %%eax \n\t"   
05521                         "mov      %%al, (%%edi) \n\t"   
05522                         "movd      %%mm3, %%edx \n\t"   
05523                         "movd      %%mm2, %%ebx \n\t"   
05524                         "movd      %%mm1, %%eax \n\t"   
05525                         
05526                         "movd      %%mm6, %%esi \n\t"   
05527                         "sub        $208, %%edx \n\t"   
05528                         "inc              %%esi \n\t"   
05529                         "inc              %%edi \n\t"   
05530                         
05531                         "dec              %%ecx \n\t"   
05532                         "jnz            .L10352 \n\t"   
05533                         "add          $8, %%esi \n\t"   
05534                         "add          $8, %%edi \n\t"   
05535                         "dec              %%ebx \n\t"   
05536                         "jnz            .L10350 \n\t"   
05537                         
05538                         "emms                   \n\t"   
05539                         "popa                   \n\t":"=m" (Dest)       
05540                         :"m"(Src),              
05541                         "m"(rows),              
05542                         "m"(columns),           
05543                         "m"(Kernel),            
05544                         "m"(Divisor)            
05545                         );
05546 #endif
05547 #endif
05548                 return (0);
05549         } else {
05550                 
05551                 return (-1);
05552         }
05553 }
05554 
05569 int SDL_imageFilterConvolveKernel3x3ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
05570                                                                                            signed short *Kernel, unsigned char NRightShift)
05571 {
05572         
05573         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
05574                 return(-1);
05575 
05576         if ((columns < 3) || (rows < 3) || (NRightShift > 7))
05577                 return (-1);
05578 
05579         if ((SDL_imageFilterMMXdetect())) {
05580 #ifdef USE_MMX
05581 #if !defined(GCC__)
05582                 __asm
05583                 {
05584                         pusha
05585                                 pxor mm0, mm0           
05586                                 xor ebx, ebx    
05587                                 mov bl, NRightShift     
05588                                 movd mm4, ebx           
05589                                 mov edx, Kernel         
05590                                 movq mm5, [edx]         
05591                         add edx, 8      
05592                                 movq mm6, [edx]         
05593                         add edx, 8      
05594                                 movq mm7, [edx]         
05595                         
05596                         mov eax, columns        
05597                                 mov esi, Src    
05598                                 mov edi, Dest           
05599                                 add edi, eax    
05600                                 inc              edi            
05601                                 mov edx, rows           
05602                                 sub edx, 2      
05603                                 
05604 L10360:
05605                         mov ecx, eax    
05606                                 sub ecx, 2      
05607                                 align 16                        
05608 L10362:
05609                         
05610                         movq mm1, [esi]         
05611                         add esi, eax    
05612                                 movq mm2, [esi]         
05613                         add esi, eax    
05614                                 movq mm3, [esi]         
05615                         punpcklbw mm1, mm0      
05616                                 punpcklbw mm2, mm0      
05617                                 punpcklbw mm3, mm0      
05618                                 psrlw mm1, mm4          
05619                                 psrlw mm2, mm4          
05620                                 psrlw mm3, mm4          
05621                                 pmullw mm1, mm5         
05622                                 pmullw mm2, mm6         
05623                                 pmullw mm3, mm7         
05624                                 paddsw mm1, mm2         
05625                                 paddsw mm1, mm3         
05626                                 movq mm2, mm1           
05627                                 psrlq mm1, 32           
05628                                 paddsw mm1, mm2         
05629                                 movq mm3, mm1           
05630                                 psrlq mm1, 16           
05631                                 paddsw mm1, mm3         
05632                                 packuswb mm1, mm0       
05633                                 movd ebx, mm1           
05634                                 mov [edi], bl           
05635                                 
05636                                 sub esi, eax    
05637                                 sub esi, eax
05638                                 inc              esi            
05639                                 inc              edi            
05640                                 
05641                                 dec              ecx            
05642                                 jnz            L10362           
05643                                 add esi, 2      
05644                                 add edi, 2      
05645                                 dec              edx            
05646                                 jnz            L10360           
05647                                 
05648                                 emms                            
05649                                 popa
05650                 }
05651 #else
05652                 asm volatile
05653                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
05654                         "xor       %%ebx, %%ebx \n\t"   
05655                         "mov           %5, %%bl \n\t"   
05656                         "movd      %%ebx, %%mm4 \n\t"   
05657                         "mov          %4, %%edx \n\t"   
05658                         "movq    (%%edx), %%mm5 \n\t"   
05659                         "add          $8, %%edx \n\t"   
05660                         "movq    (%%edx), %%mm6 \n\t"   
05661                         "add          $8, %%edx \n\t"   
05662                         "movq    (%%edx), %%mm7 \n\t"   
05663                         
05664                         "mov          %3, %%eax \n\t"   
05665                         "mov          %1, %%esi \n\t"   
05666                         "mov          %0, %%edi \n\t"   
05667                         "add       %%eax, %%edi \n\t"   
05668                         "inc              %%edi \n\t"   
05669                         "mov          %2, %%edx \n\t"   
05670                         "sub          $2, %%edx \n\t"   
05671                         
05672                         ".L10360:               \n\t" "mov       %%eax, %%ecx \n\t"     
05673                         "sub          $2, %%ecx \n\t"   
05674                         ".align 16              \n\t"   
05675                         ".L10362:               \n\t"
05676                         
05677                         "movq    (%%esi), %%mm1 \n\t"   
05678                         "add       %%eax, %%esi \n\t"   
05679                         "movq    (%%esi), %%mm2 \n\t"   
05680                         "add       %%eax, %%esi \n\t"   
05681                         "movq    (%%esi), %%mm3 \n\t"   
05682                         "punpcklbw %%mm0, %%mm1 \n\t"   
05683                         "punpcklbw %%mm0, %%mm2 \n\t"   
05684                         "punpcklbw %%mm0, %%mm3 \n\t"   
05685                         "psrlw     %%mm4, %%mm1 \n\t"   
05686                         "psrlw     %%mm4, %%mm2 \n\t"   
05687                         "psrlw     %%mm4, %%mm3 \n\t"   
05688                         "pmullw    %%mm5, %%mm1 \n\t"   
05689                         "pmullw    %%mm6, %%mm2 \n\t"   
05690                         "pmullw    %%mm7, %%mm3 \n\t"   
05691                         "paddsw    %%mm2, %%mm1 \n\t"   
05692                         "paddsw    %%mm3, %%mm1 \n\t"   
05693                         "movq      %%mm1, %%mm2 \n\t"   
05694                         "psrlq       $32, %%mm1 \n\t"   
05695                         "paddsw    %%mm2, %%mm1 \n\t"   
05696                         "movq      %%mm1, %%mm3 \n\t"   
05697                         "psrlq       $16, %%mm1 \n\t"   
05698                         "paddsw    %%mm3, %%mm1 \n\t"   
05699                         "packuswb  %%mm0, %%mm1 \n\t"   
05700                         "movd      %%mm1, %%ebx \n\t"   
05701                         "mov      %%bl, (%%edi) \n\t"   
05702                         
05703                         "sub       %%eax, %%esi \n\t"   
05704                         "sub       %%eax, %%esi \n\t" "inc              %%esi \n\t"     
05705                         "inc              %%edi \n\t"   
05706                         
05707                         "dec              %%ecx \n\t"   
05708                         "jnz            .L10362 \n\t"   
05709                         "add          $2, %%esi \n\t"   
05710                         "add          $2, %%edi \n\t"   
05711                         "dec              %%edx \n\t"   
05712                         "jnz            .L10360 \n\t"   
05713                         
05714                         "emms                   \n\t"   
05715                         "popa                   \n\t":"=m" (Dest)       
05716                         :"m"(Src),              
05717                         "m"(rows),              
05718                         "m"(columns),           
05719                         "m"(Kernel),            
05720                         "m"(NRightShift)        
05721                         );
05722 #endif
05723 #endif
05724                 return (0);
05725         } else {
05726                 
05727                 return (-1);
05728         }
05729 }
05730 
05745 int SDL_imageFilterConvolveKernel5x5ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
05746                                                                                            signed short *Kernel, unsigned char NRightShift)
05747 {
05748         
05749         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
05750                 return(-1);
05751 
05752         if ((columns < 5) || (rows < 5) || (NRightShift > 7))
05753                 return (-1);
05754 
05755         if ((SDL_imageFilterMMXdetect())) {
05756 #ifdef USE_MMX
05757 #if !defined(GCC__)
05758                 __asm
05759                 {
05760                         pusha
05761                                 pxor mm0, mm0           
05762                                 xor ebx, ebx    
05763                                 mov bl, NRightShift     
05764                                 movd mm5, ebx           
05765                                 mov edx, Kernel         
05766                                 mov esi, Src    
05767                                 mov edi, Dest           
05768                                 add edi, 2      
05769                                 mov eax, columns        
05770                                 shl eax, 1      
05771                                 add edi, eax    
05772                                 shr eax, 1      
05773                                 mov ebx, rows           
05774                                 sub ebx, 4      
05775                                 
05776 L10370:
05777                         mov ecx, eax    
05778                                 sub ecx, 4      
05779                                 align 16                        
05780 L10372:
05781                         pxor mm7, mm7           
05782                                 movd mm6, esi           
05783                                 
05784                                 movq mm1, [esi]         
05785                         movq mm2, mm1           
05786                                 add esi, eax    
05787                                 movq mm3, [edx]         
05788                         add edx, 8      
05789                                 movq mm4, [edx]         
05790                         add edx, 8      
05791                                 punpcklbw mm1, mm0      
05792                                 punpckhbw mm2, mm0      
05793                                 psrlw mm1, mm5          
05794                                 psrlw mm2, mm5          
05795                                 pmullw mm1, mm3         
05796                                 pmullw mm2, mm4         
05797                                 paddsw mm1, mm2         
05798                                 paddsw mm7, mm1         
05799                                 
05800                                 movq mm1, [esi]         
05801                         movq mm2, mm1           
05802                                 add esi, eax    
05803                                 movq mm3, [edx]         
05804                         add edx, 8      
05805                                 movq mm4, [edx]         
05806                         add edx, 8      
05807                                 punpcklbw mm1, mm0      
05808                                 punpckhbw mm2, mm0      
05809                                 psrlw mm1, mm5          
05810                                 psrlw mm2, mm5          
05811                                 pmullw mm1, mm3         
05812                                 pmullw mm2, mm4         
05813                                 paddsw mm1, mm2         
05814                                 paddsw mm7, mm1         
05815                                 
05816                                 movq mm1, [esi]         
05817                         movq mm2, mm1           
05818                                 add esi, eax    
05819                                 movq mm3, [edx]         
05820                         add edx, 8      
05821                                 movq mm4, [edx]         
05822                         add edx, 8      
05823                                 punpcklbw mm1, mm0      
05824                                 punpckhbw mm2, mm0      
05825                                 psrlw mm1, mm5          
05826                                 psrlw mm2, mm5          
05827                                 pmullw mm1, mm3         
05828                                 pmullw mm2, mm4         
05829                                 paddsw mm1, mm2         
05830                                 paddsw mm7, mm1         
05831                                 
05832                                 movq mm1, [esi]         
05833                         movq mm2, mm1           
05834                                 add esi, eax    
05835                                 movq mm3, [edx]         
05836                         add edx, 8      
05837                                 movq mm4, [edx]         
05838                         add edx, 8      
05839                                 punpcklbw mm1, mm0      
05840                                 punpckhbw mm2, mm0      
05841                                 psrlw mm1, mm5          
05842                                 psrlw mm2, mm5          
05843                                 pmullw mm1, mm3         
05844                                 pmullw mm2, mm4         
05845                                 paddsw mm1, mm2         
05846                                 paddsw mm7, mm1         
05847                                 
05848                                 movq mm1, [esi]         
05849                         movq mm2, mm1           
05850                                 movq mm3, [edx]         
05851                         add edx, 8      
05852                                 movq mm4, [edx]         
05853                         punpcklbw mm1, mm0      
05854                                 punpckhbw mm2, mm0      
05855                                 psrlw mm1, mm5          
05856                                 psrlw mm2, mm5          
05857                                 pmullw mm1, mm3         
05858                                 pmullw mm2, mm4         
05859                                 paddsw mm1, mm2         
05860                                 paddsw mm7, mm1         
05861                                 
05862                                 movq mm3, mm7           
05863                                 psrlq mm7, 32           
05864                                 paddsw mm7, mm3         
05865                                 movq mm2, mm7           
05866                                 psrlq mm7, 16           
05867                                 paddsw mm7, mm2         
05868                                 movd mm1, eax           
05869                                 packuswb mm7, mm0       
05870                                 movd eax, mm7           
05871                                 mov [edi], al           
05872                                 movd eax, mm1           
05873                                 
05874                                 movd esi, mm6           
05875                                 sub edx, 72     
05876                                 inc              esi            
05877                                 inc              edi            
05878                                 
05879                                 dec              ecx            
05880                                 jnz            L10372           
05881                                 add esi, 4      
05882                                 add edi, 4      
05883                                 dec              ebx            
05884                                 jnz            L10370           
05885                                 
05886                                 emms                            
05887                                 popa
05888                 }
05889 #else
05890                 asm volatile
05891                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
05892                         "xor       %%ebx, %%ebx \n\t"   
05893                         "mov           %5, %%bl \n\t"   
05894                         "movd      %%ebx, %%mm5 \n\t"   
05895                         "mov          %4, %%edx \n\t"   
05896                         "mov          %1, %%esi \n\t"   
05897                         "mov          %0, %%edi \n\t"   
05898                         "add          $2, %%edi \n\t"   
05899                         "mov          %3, %%eax \n\t"   
05900                         "shl          $1, %%eax \n\t"   
05901                         "add       %%eax, %%edi \n\t"   
05902                         "shr          $1, %%eax \n\t"   
05903                         "mov          %2, %%ebx \n\t"   
05904                         "sub          $4, %%ebx \n\t"   
05905                         
05906                         ".L10370:               \n\t" "mov       %%eax, %%ecx \n\t"     
05907                         "sub          $4, %%ecx \n\t"   
05908                         ".align 16              \n\t"   
05909                         ".L10372:               \n\t" "pxor      %%mm7, %%mm7 \n\t"     
05910                         "movd      %%esi, %%mm6 \n\t"   
05911                         
05912                         "movq    (%%esi), %%mm1 \n\t"   
05913                         "movq      %%mm1, %%mm2 \n\t"   
05914                         "add       %%eax, %%esi \n\t"   
05915                         "movq    (%%edx), %%mm3 \n\t"   
05916                         "add          $8, %%edx \n\t"   
05917                         "movq    (%%edx), %%mm4 \n\t"   
05918                         "add          $8, %%edx \n\t"   
05919                         "punpcklbw %%mm0, %%mm1 \n\t"   
05920                         "punpckhbw %%mm0, %%mm2 \n\t"   
05921                         "psrlw     %%mm5, %%mm1 \n\t"   
05922                         "psrlw     %%mm5, %%mm2 \n\t"   
05923                         "pmullw    %%mm3, %%mm1 \n\t"   
05924                         "pmullw    %%mm4, %%mm2 \n\t"   
05925                         "paddsw    %%mm2, %%mm1 \n\t"   
05926                         "paddsw    %%mm1, %%mm7 \n\t"   
05927                         
05928                         "movq    (%%esi), %%mm1 \n\t"   
05929                         "movq      %%mm1, %%mm2 \n\t"   
05930                         "add       %%eax, %%esi \n\t"   
05931                         "movq    (%%edx), %%mm3 \n\t"   
05932                         "add          $8, %%edx \n\t"   
05933                         "movq    (%%edx), %%mm4 \n\t"   
05934                         "add          $8, %%edx \n\t"   
05935                         "punpcklbw %%mm0, %%mm1 \n\t"   
05936                         "punpckhbw %%mm0, %%mm2 \n\t"   
05937                         "psrlw     %%mm5, %%mm1 \n\t"   
05938                         "psrlw     %%mm5, %%mm2 \n\t"   
05939                         "pmullw    %%mm3, %%mm1 \n\t"   
05940                         "pmullw    %%mm4, %%mm2 \n\t"   
05941                         "paddsw    %%mm2, %%mm1 \n\t"   
05942                         "paddsw    %%mm1, %%mm7 \n\t"   
05943                         
05944                         "movq    (%%esi), %%mm1 \n\t"   
05945                         "movq      %%mm1, %%mm2 \n\t"   
05946                         "add       %%eax, %%esi \n\t"   
05947                         "movq    (%%edx), %%mm3 \n\t"   
05948                         "add          $8, %%edx \n\t"   
05949                         "movq    (%%edx), %%mm4 \n\t"   
05950                         "add          $8, %%edx \n\t"   
05951                         "punpcklbw %%mm0, %%mm1 \n\t"   
05952                         "punpckhbw %%mm0, %%mm2 \n\t"   
05953                         "psrlw     %%mm5, %%mm1 \n\t"   
05954                         "psrlw     %%mm5, %%mm2 \n\t"   
05955                         "pmullw    %%mm3, %%mm1 \n\t"   
05956                         "pmullw    %%mm4, %%mm2 \n\t"   
05957                         "paddsw    %%mm2, %%mm1 \n\t"   
05958                         "paddsw    %%mm1, %%mm7 \n\t"   
05959                         
05960                         "movq    (%%esi), %%mm1 \n\t"   
05961                         "movq      %%mm1, %%mm2 \n\t"   
05962                         "add       %%eax, %%esi \n\t"   
05963                         "movq    (%%edx), %%mm3 \n\t"   
05964                         "add          $8, %%edx \n\t"   
05965                         "movq    (%%edx), %%mm4 \n\t"   
05966                         "add          $8, %%edx \n\t"   
05967                         "punpcklbw %%mm0, %%mm1 \n\t"   
05968                         "punpckhbw %%mm0, %%mm2 \n\t"   
05969                         "psrlw     %%mm5, %%mm1 \n\t"   
05970                         "psrlw     %%mm5, %%mm2 \n\t"   
05971                         "pmullw    %%mm3, %%mm1 \n\t"   
05972                         "pmullw    %%mm4, %%mm2 \n\t"   
05973                         "paddsw    %%mm2, %%mm1 \n\t"   
05974                         "paddsw    %%mm1, %%mm7 \n\t"   
05975                         
05976                         "movq    (%%esi), %%mm1 \n\t"   
05977                         "movq      %%mm1, %%mm2 \n\t"   
05978                         "movq    (%%edx), %%mm3 \n\t"   
05979                         "add          $8, %%edx \n\t"   
05980                         "movq    (%%edx), %%mm4 \n\t"   
05981                         "punpcklbw %%mm0, %%mm1 \n\t"   
05982                         "punpckhbw %%mm0, %%mm2 \n\t"   
05983                         "psrlw     %%mm5, %%mm1 \n\t"   
05984                         "psrlw     %%mm5, %%mm2 \n\t"   
05985                         "pmullw    %%mm3, %%mm1 \n\t"   
05986                         "pmullw    %%mm4, %%mm2 \n\t"   
05987                         "paddsw    %%mm2, %%mm1 \n\t"   
05988                         "paddsw    %%mm1, %%mm7 \n\t"   
05989                         
05990                         "movq      %%mm7, %%mm3 \n\t"   
05991                         "psrlq       $32, %%mm7 \n\t"   
05992                         "paddsw    %%mm3, %%mm7 \n\t"   
05993                         "movq      %%mm7, %%mm2 \n\t"   
05994                         "psrlq       $16, %%mm7 \n\t"   
05995                         "paddsw    %%mm2, %%mm7 \n\t"   
05996                         "movd      %%eax, %%mm1 \n\t"   
05997                         "packuswb  %%mm0, %%mm7 \n\t"   
05998                         "movd      %%mm7, %%eax \n\t"   
05999                         "mov      %%al, (%%edi) \n\t"   
06000                         "movd      %%mm1, %%eax \n\t"   
06001                         
06002                         "movd      %%mm6, %%esi \n\t"   
06003                         "sub         $72, %%edx \n\t"   
06004                         "inc              %%esi \n\t"   
06005                         "inc              %%edi \n\t"   
06006                         
06007                         "dec              %%ecx \n\t"   
06008                         "jnz            .L10372 \n\t"   
06009                         "add          $4, %%esi \n\t"   
06010                         "add          $4, %%edi \n\t"   
06011                         "dec              %%ebx \n\t"   
06012                         "jnz            .L10370 \n\t"   
06013                         
06014                         "emms                   \n\t"   
06015                         "popa                   \n\t":"=m" (Dest)       
06016                         :"m"(Src),              
06017                         "m"(rows),              
06018                         "m"(columns),           
06019                         "m"(Kernel),            
06020                         "m"(NRightShift)        
06021                         );
06022 #endif
06023 #endif
06024                 return (0);
06025         } else {
06026                 
06027                 return (-1);
06028         }
06029 }
06030 
06045 int SDL_imageFilterConvolveKernel7x7ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
06046                                                                                            signed short *Kernel, unsigned char NRightShift)
06047 {
06048         
06049         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
06050                 return(-1);
06051 
06052         if ((columns < 7) || (rows < 7) || (NRightShift > 7))
06053                 return (-1);
06054 
06055         if ((SDL_imageFilterMMXdetect())) {
06056 #ifdef USE_MMX
06057 #if !defined(GCC__)
06058                 __asm
06059                 {
06060                         pusha
06061                                 pxor mm0, mm0           
06062                                 xor ebx, ebx    
06063                                 mov bl, NRightShift     
06064                                 movd mm5, ebx           
06065                                 mov edx, Kernel         
06066                                 mov esi, Src    
06067                                 mov edi, Dest           
06068                                 add edi, 3      
06069                                 mov eax, columns        
06070                                 add edi, eax    
06071                                 add edi, eax
06072                                 add edi, eax
06073                                 mov ebx, rows           
06074                                 sub ebx, 6      
06075                                 
06076 L10380:
06077                         mov ecx, eax    
06078                                 sub ecx, 6      
06079                                 align 16                        
06080 L10382:
06081                         pxor mm7, mm7           
06082                                 movd mm6, esi           
06083                                 
06084                                 movq mm1, [esi]         
06085                         movq mm2, mm1           
06086                                 add esi, eax    
06087                                 movq mm3, [edx]         
06088                         add edx, 8      
06089                                 movq mm4, [edx]         
06090                         add edx, 8      
06091                                 punpcklbw mm1, mm0      
06092                                 punpckhbw mm2, mm0      
06093                                 psrlw mm1, mm5          
06094                                 psrlw mm2, mm5          
06095                                 pmullw mm1, mm3         
06096                                 pmullw mm2, mm4         
06097                                 paddsw mm1, mm2         
06098                                 paddsw mm7, mm1         
06099                                 
06100                                 movq mm1, [esi]         
06101                         movq mm2, mm1           
06102                                 add esi, eax    
06103                                 movq mm3, [edx]         
06104                         add edx, 8      
06105                                 movq mm4, [edx]         
06106                         add edx, 8      
06107                                 punpcklbw mm1, mm0      
06108                                 punpckhbw mm2, mm0      
06109                                 psrlw mm1, mm5          
06110                                 psrlw mm2, mm5          
06111                                 pmullw mm1, mm3         
06112                                 pmullw mm2, mm4         
06113                                 paddsw mm1, mm2         
06114                                 paddsw mm7, mm1         
06115                                 
06116                                 movq mm1, [esi]         
06117                         movq mm2, mm1           
06118                                 add esi, eax    
06119                                 movq mm3, [edx]         
06120                         add edx, 8      
06121                                 movq mm4, [edx]         
06122                         add edx, 8      
06123                                 punpcklbw mm1, mm0      
06124                                 punpckhbw mm2, mm0      
06125                                 psrlw mm1, mm5          
06126                                 psrlw mm2, mm5          
06127                                 pmullw mm1, mm3         
06128                                 pmullw mm2, mm4         
06129                                 paddsw mm1, mm2         
06130                                 paddsw mm7, mm1         
06131                                 
06132                                 movq mm1, [esi]         
06133                         movq mm2, mm1           
06134                                 add esi, eax    
06135                                 movq mm3, [edx]         
06136                         add edx, 8      
06137                                 movq mm4, [edx]         
06138                         add edx, 8      
06139                                 punpcklbw mm1, mm0      
06140                                 punpckhbw mm2, mm0      
06141                                 psrlw mm1, mm5          
06142                                 psrlw mm2, mm5          
06143                                 pmullw mm1, mm3         
06144                                 pmullw mm2, mm4         
06145                                 paddsw mm1, mm2         
06146                                 paddsw mm7, mm1         
06147                                 
06148                                 movq mm1, [esi]         
06149                         movq mm2, mm1           
06150                                 add esi, eax    
06151                                 movq mm3, [edx]         
06152                         add edx, 8      
06153                                 movq mm4, [edx]         
06154                         add edx, 8      
06155                                 punpcklbw mm1, mm0      
06156                                 punpckhbw mm2, mm0      
06157                                 psrlw mm1, mm5          
06158                                 psrlw mm2, mm5          
06159                                 pmullw mm1, mm3         
06160                                 pmullw mm2, mm4         
06161                                 paddsw mm1, mm2         
06162                                 paddsw mm7, mm1         
06163                                 
06164                                 movq mm1, [esi]         
06165                         movq mm2, mm1           
06166                                 add esi, eax    
06167                                 movq mm3, [edx]         
06168                         add edx, 8      
06169                                 movq mm4, [edx]         
06170                         add edx, 8      
06171                                 punpcklbw mm1, mm0      
06172                                 punpckhbw mm2, mm0      
06173                                 psrlw mm1, mm5          
06174                                 psrlw mm2, mm5          
06175                                 pmullw mm1, mm3         
06176                                 pmullw mm2, mm4         
06177                                 paddsw mm1, mm2         
06178                                 paddsw mm7, mm1         
06179                                 
06180                                 movq mm1, [esi]         
06181                         movq mm2, mm1           
06182                                 movq mm3, [edx]         
06183                         add edx, 8      
06184                                 movq mm4, [edx]         
06185                         punpcklbw mm1, mm0      
06186                                 punpckhbw mm2, mm0      
06187                                 psrlw mm1, mm5          
06188                                 psrlw mm2, mm5          
06189                                 pmullw mm1, mm3         
06190                                 pmullw mm2, mm4         
06191                                 paddsw mm1, mm2         
06192                                 paddsw mm7, mm1         
06193                                 
06194                                 movq mm3, mm7           
06195                                 psrlq mm7, 32           
06196                                 paddsw mm7, mm3         
06197                                 movq mm2, mm7           
06198                                 psrlq mm7, 16           
06199                                 paddsw mm7, mm2         
06200                                 movd mm1, eax           
06201                                 packuswb mm7, mm0       
06202                                 movd eax, mm7           
06203                                 mov [edi], al           
06204                                 movd eax, mm1           
06205                                 
06206                                 movd esi, mm6           
06207                                 sub edx, 104    
06208                                 inc              esi            
06209                                 inc              edi            
06210                                 
06211                                 dec              ecx            
06212                                 jnz            L10382           
06213                                 add esi, 6      
06214                                 add edi, 6      
06215                                 dec              ebx            
06216                                 jnz            L10380           
06217                                 
06218                                 emms                            
06219                                 popa
06220                 }
06221 #else
06222                 asm volatile
06223                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
06224                         "xor       %%ebx, %%ebx \n\t"   
06225                         "mov           %5, %%bl \n\t"   
06226                         "movd      %%ebx, %%mm5 \n\t"   
06227                         "mov          %4, %%edx \n\t"   
06228                         "mov          %1, %%esi \n\t"   
06229                         "mov          %0, %%edi \n\t"   
06230                         "add          $3, %%edi \n\t"   
06231                         "mov          %3, %%eax \n\t"   
06232                         "add       %%eax, %%edi \n\t"   
06233                         "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "mov          %2, %%ebx \n\t"       
06234                         "sub          $6, %%ebx \n\t"   
06235                         
06236                         ".L10380:               \n\t" "mov       %%eax, %%ecx \n\t"     
06237                         "sub          $6, %%ecx \n\t"   
06238                         ".align 16              \n\t"   
06239                         ".L10382:               \n\t" "pxor      %%mm7, %%mm7 \n\t"     
06240                         "movd      %%esi, %%mm6 \n\t"   
06241                         
06242                         "movq    (%%esi), %%mm1 \n\t"   
06243                         "movq      %%mm1, %%mm2 \n\t"   
06244                         "add       %%eax, %%esi \n\t"   
06245                         "movq    (%%edx), %%mm3 \n\t"   
06246                         "add          $8, %%edx \n\t"   
06247                         "movq    (%%edx), %%mm4 \n\t"   
06248                         "add          $8, %%edx \n\t"   
06249                         "punpcklbw %%mm0, %%mm1 \n\t"   
06250                         "punpckhbw %%mm0, %%mm2 \n\t"   
06251                         "psrlw     %%mm5, %%mm1 \n\t"   
06252                         "psrlw     %%mm5, %%mm2 \n\t"   
06253                         "pmullw    %%mm3, %%mm1 \n\t"   
06254                         "pmullw    %%mm4, %%mm2 \n\t"   
06255                         "paddsw    %%mm2, %%mm1 \n\t"   
06256                         "paddsw    %%mm1, %%mm7 \n\t"   
06257                         
06258                         "movq    (%%esi), %%mm1 \n\t"   
06259                         "movq      %%mm1, %%mm2 \n\t"   
06260                         "add       %%eax, %%esi \n\t"   
06261                         "movq    (%%edx), %%mm3 \n\t"   
06262                         "add          $8, %%edx \n\t"   
06263                         "movq    (%%edx), %%mm4 \n\t"   
06264                         "add          $8, %%edx \n\t"   
06265                         "punpcklbw %%mm0, %%mm1 \n\t"   
06266                         "punpckhbw %%mm0, %%mm2 \n\t"   
06267                         "psrlw     %%mm5, %%mm1 \n\t"   
06268                         "psrlw     %%mm5, %%mm2 \n\t"   
06269                         "pmullw    %%mm3, %%mm1 \n\t"   
06270                         "pmullw    %%mm4, %%mm2 \n\t"   
06271                         "paddsw    %%mm2, %%mm1 \n\t"   
06272                         "paddsw    %%mm1, %%mm7 \n\t"   
06273                         
06274                         "movq    (%%esi), %%mm1 \n\t"   
06275                         "movq      %%mm1, %%mm2 \n\t"   
06276                         "add       %%eax, %%esi \n\t"   
06277                         "movq    (%%edx), %%mm3 \n\t"   
06278                         "add          $8, %%edx \n\t"   
06279                         "movq    (%%edx), %%mm4 \n\t"   
06280                         "add          $8, %%edx \n\t"   
06281                         "punpcklbw %%mm0, %%mm1 \n\t"   
06282                         "punpckhbw %%mm0, %%mm2 \n\t"   
06283                         "psrlw     %%mm5, %%mm1 \n\t"   
06284                         "psrlw     %%mm5, %%mm2 \n\t"   
06285                         "pmullw    %%mm3, %%mm1 \n\t"   
06286                         "pmullw    %%mm4, %%mm2 \n\t"   
06287                         "paddsw    %%mm2, %%mm1 \n\t"   
06288                         "paddsw    %%mm1, %%mm7 \n\t"   
06289                         
06290                         "movq    (%%esi), %%mm1 \n\t"   
06291                         "movq      %%mm1, %%mm2 \n\t"   
06292                         "add       %%eax, %%esi \n\t"   
06293                         "movq    (%%edx), %%mm3 \n\t"   
06294                         "add          $8, %%edx \n\t"   
06295                         "movq    (%%edx), %%mm4 \n\t"   
06296                         "add          $8, %%edx \n\t"   
06297                         "punpcklbw %%mm0, %%mm1 \n\t"   
06298                         "punpckhbw %%mm0, %%mm2 \n\t"   
06299                         "psrlw     %%mm5, %%mm1 \n\t"   
06300                         "psrlw     %%mm5, %%mm2 \n\t"   
06301                         "pmullw    %%mm3, %%mm1 \n\t"   
06302                         "pmullw    %%mm4, %%mm2 \n\t"   
06303                         "paddsw    %%mm2, %%mm1 \n\t"   
06304                         "paddsw    %%mm1, %%mm7 \n\t"   
06305                         
06306                         "movq    (%%esi), %%mm1 \n\t"   
06307                         "movq      %%mm1, %%mm2 \n\t"   
06308                         "add       %%eax, %%esi \n\t"   
06309                         "movq    (%%edx), %%mm3 \n\t"   
06310                         "add          $8, %%edx \n\t"   
06311                         "movq    (%%edx), %%mm4 \n\t"   
06312                         "add          $8, %%edx \n\t"   
06313                         "punpcklbw %%mm0, %%mm1 \n\t"   
06314                         "punpckhbw %%mm0, %%mm2 \n\t"   
06315                         "psrlw     %%mm5, %%mm1 \n\t"   
06316                         "psrlw     %%mm5, %%mm2 \n\t"   
06317                         "pmullw    %%mm3, %%mm1 \n\t"   
06318                         "pmullw    %%mm4, %%mm2 \n\t"   
06319                         "paddsw    %%mm2, %%mm1 \n\t"   
06320                         "paddsw    %%mm1, %%mm7 \n\t"   
06321                         
06322                         "movq    (%%esi), %%mm1 \n\t"   
06323                         "movq      %%mm1, %%mm2 \n\t"   
06324                         "add       %%eax, %%esi \n\t"   
06325                         "movq    (%%edx), %%mm3 \n\t"   
06326                         "add          $8, %%edx \n\t"   
06327                         "movq    (%%edx), %%mm4 \n\t"   
06328                         "add          $8, %%edx \n\t"   
06329                         "punpcklbw %%mm0, %%mm1 \n\t"   
06330                         "punpckhbw %%mm0, %%mm2 \n\t"   
06331                         "psrlw     %%mm5, %%mm1 \n\t"   
06332                         "psrlw     %%mm5, %%mm2 \n\t"   
06333                         "pmullw    %%mm3, %%mm1 \n\t"   
06334                         "pmullw    %%mm4, %%mm2 \n\t"   
06335                         "paddsw    %%mm2, %%mm1 \n\t"   
06336                         "paddsw    %%mm1, %%mm7 \n\t"   
06337                         
06338                         "movq    (%%esi), %%mm1 \n\t"   
06339                         "movq      %%mm1, %%mm2 \n\t"   
06340                         "movq    (%%edx), %%mm3 \n\t"   
06341                         "add          $8, %%edx \n\t"   
06342                         "movq    (%%edx), %%mm4 \n\t"   
06343                         "punpcklbw %%mm0, %%mm1 \n\t"   
06344                         "punpckhbw %%mm0, %%mm2 \n\t"   
06345                         "psrlw     %%mm5, %%mm1 \n\t"   
06346                         "psrlw     %%mm5, %%mm2 \n\t"   
06347                         "pmullw    %%mm3, %%mm1 \n\t"   
06348                         "pmullw    %%mm4, %%mm2 \n\t"   
06349                         "paddsw    %%mm2, %%mm1 \n\t"   
06350                         "paddsw    %%mm1, %%mm7 \n\t"   
06351                         
06352                         "movq      %%mm7, %%mm3 \n\t"   
06353                         "psrlq       $32, %%mm7 \n\t"   
06354                         "paddsw    %%mm3, %%mm7 \n\t"   
06355                         "movq      %%mm7, %%mm2 \n\t"   
06356                         "psrlq       $16, %%mm7 \n\t"   
06357                         "paddsw    %%mm2, %%mm7 \n\t"   
06358                         "movd      %%eax, %%mm1 \n\t"   
06359                         "packuswb  %%mm0, %%mm7 \n\t"   
06360                         "movd      %%mm7, %%eax \n\t"   
06361                         "mov      %%al, (%%edi) \n\t"   
06362                         "movd      %%mm1, %%eax \n\t"   
06363                         
06364                         "movd      %%mm6, %%esi \n\t"   
06365                         "sub        $104, %%edx \n\t"   
06366                         "inc              %%esi \n\t"   
06367                         "inc              %%edi \n\t"   
06368                         
06369                         "dec              %%ecx \n\t"   
06370                         "jnz            .L10382 \n\t"   
06371                         "add          $6, %%esi \n\t"   
06372                         "add          $6, %%edi \n\t"   
06373                         "dec              %%ebx \n\t"   
06374                         "jnz            .L10380 \n\t"   
06375                         
06376                         "emms                   \n\t"   
06377                         "popa                   \n\t":"=m" (Dest)       
06378                         :"m"(Src),              
06379                         "m"(rows),              
06380                         "m"(columns),           
06381                         "m"(Kernel),            
06382                         "m"(NRightShift)        
06383                         );
06384 #endif
06385 #endif
06386                 return (0);
06387         } else {
06388                 
06389                 return (-1);
06390         }
06391 }
06392 
06407 int SDL_imageFilterConvolveKernel9x9ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
06408                                                                                            signed short *Kernel, unsigned char NRightShift)
06409 {
06410         
06411         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
06412                 return(-1);
06413 
06414         if ((columns < 9) || (rows < 9) || (NRightShift > 7))
06415                 return (-1);
06416 
06417         if ((SDL_imageFilterMMXdetect())) {
06418 #ifdef USE_MMX
06419 #if !defined(GCC__)
06420                 __asm
06421                 {
06422                         pusha
06423                                 pxor mm0, mm0           
06424                                 xor ebx, ebx    
06425                                 mov bl, NRightShift     
06426                                 movd mm5, ebx           
06427                                 mov edx, Kernel         
06428                                 mov esi, Src    
06429                                 mov edi, Dest           
06430                                 add edi, 4      
06431                                 mov eax, columns        
06432                                 add edi, eax    
06433                                 add edi, eax
06434                                 add edi, eax
06435                                 add edi, eax
06436                                 mov ebx, rows           
06437                                 sub ebx, 8      
06438                                 
06439 L10390:
06440                         mov ecx, eax    
06441                                 sub ecx, 8      
06442                                 align 16                        
06443 L10392:
06444                         pxor mm7, mm7           
06445                                 movd mm6, esi           
06446                                 
06447                                 movq mm1, [esi]         
06448                         movq mm2, mm1           
06449                                 inc              esi            
06450                                 movq mm3, [edx]         
06451                         add edx, 8      
06452                                 movq mm4, [edx]         
06453                         add edx, 8      
06454                                 punpcklbw mm1, mm0      
06455                                 punpckhbw mm2, mm0      
06456                                 psrlw mm1, mm5          
06457                                 psrlw mm2, mm5          
06458                                 pmullw mm1, mm3         
06459                                 pmullw mm2, mm4         
06460                                 paddsw mm1, mm2         
06461                                 paddsw mm7, mm1         
06462                                 movq mm1, [esi]         
06463                         dec              esi
06464                                 add esi, eax    
06465                                 movq mm3, [edx]         
06466                         add edx, 8      
06467                                 punpcklbw mm1, mm0      
06468                                 psrlw mm1, mm5          
06469                                 pmullw mm1, mm3         
06470                                 paddsw mm7, mm1         
06471                                 
06472                                 movq mm1, [esi]         
06473                         movq mm2, mm1           
06474                                 inc              esi            
06475                                 movq mm3, [edx]         
06476                         add edx, 8      
06477                                 movq mm4, [edx]         
06478                         add edx, 8      
06479                                 punpcklbw mm1, mm0      
06480                                 punpckhbw mm2, mm0      
06481                                 psrlw mm1, mm5          
06482                                 psrlw mm2, mm5          
06483                                 pmullw mm1, mm3         
06484                                 pmullw mm2, mm4         
06485                                 paddsw mm1, mm2         
06486                                 paddsw mm7, mm1         
06487                                 movq mm1, [esi]         
06488                         dec              esi
06489                                 add esi, eax    
06490                                 movq mm3, [edx]         
06491                         add edx, 8      
06492                                 punpcklbw mm1, mm0      
06493                                 psrlw mm1, mm5          
06494                                 pmullw mm1, mm3         
06495                                 paddsw mm7, mm1         
06496                                 
06497                                 movq mm1, [esi]         
06498                         movq mm2, mm1           
06499                                 inc              esi            
06500                                 movq mm3, [edx]         
06501                         add edx, 8      
06502                                 movq mm4, [edx]         
06503                         add edx, 8      
06504                                 punpcklbw mm1, mm0      
06505                                 punpckhbw mm2, mm0      
06506                                 psrlw mm1, mm5          
06507                                 psrlw mm2, mm5          
06508                                 pmullw mm1, mm3         
06509                                 pmullw mm2, mm4         
06510                                 paddsw mm1, mm2         
06511                                 paddsw mm7, mm1         
06512                                 movq mm1, [esi]         
06513                         dec              esi
06514                                 add esi, eax    
06515                                 movq mm3, [edx]         
06516                         add edx, 8      
06517                                 punpcklbw mm1, mm0      
06518                                 psrlw mm1, mm5          
06519                                 pmullw mm1, mm3         
06520                                 paddsw mm7, mm1         
06521                                 
06522                                 movq mm1, [esi]         
06523                         movq mm2, mm1           
06524                                 inc              esi            
06525                                 movq mm3, [edx]         
06526                         add edx, 8      
06527                                 movq mm4, [edx]         
06528                         add edx, 8      
06529                                 punpcklbw mm1, mm0      
06530                                 punpckhbw mm2, mm0      
06531                                 psrlw mm1, mm5          
06532                                 psrlw mm2, mm5          
06533                                 pmullw mm1, mm3         
06534                                 pmullw mm2, mm4         
06535                                 paddsw mm1, mm2         
06536                                 paddsw mm7, mm1         
06537                                 movq mm1, [esi]         
06538                         dec              esi
06539                                 add esi, eax    
06540                                 movq mm3, [edx]         
06541                         add edx, 8      
06542                                 punpcklbw mm1, mm0      
06543                                 psrlw mm1, mm5          
06544                                 pmullw mm1, mm3         
06545                                 paddsw mm7, mm1         
06546                                 
06547                                 movq mm1, [esi]         
06548                         movq mm2, mm1           
06549                                 inc              esi            
06550                                 movq mm3, [edx]         
06551                         add edx, 8      
06552                                 movq mm4, [edx]         
06553                         add edx, 8      
06554                                 punpcklbw mm1, mm0      
06555                                 punpckhbw mm2, mm0      
06556                                 psrlw mm1, mm5          
06557                                 psrlw mm2, mm5          
06558                                 pmullw mm1, mm3         
06559                                 pmullw mm2, mm4         
06560                                 paddsw mm1, mm2         
06561                                 paddsw mm7, mm1         
06562                                 movq mm1, [esi]         
06563                         dec              esi
06564                                 add esi, eax    
06565                                 movq mm3, [edx]         
06566                         add edx, 8      
06567                                 punpcklbw mm1, mm0      
06568                                 psrlw mm1, mm5          
06569                                 pmullw mm1, mm3         
06570                                 paddsw mm7, mm1         
06571                                 
06572                                 movq mm1, [esi]         
06573                         movq mm2, mm1           
06574                                 inc              esi            
06575                                 movq mm3, [edx]         
06576                         add edx, 8      
06577                                 movq mm4, [edx]         
06578                         add edx, 8      
06579                                 punpcklbw mm1, mm0      
06580                                 punpckhbw mm2, mm0      
06581                                 psrlw mm1, mm5          
06582                                 psrlw mm2, mm5          
06583                                 pmullw mm1, mm3         
06584                                 pmullw mm2, mm4         
06585                                 paddsw mm1, mm2         
06586                                 paddsw mm7, mm1         
06587                                 movq mm1, [esi]         
06588                         dec              esi
06589                                 add esi, eax    
06590                                 movq mm3, [edx]         
06591                         add edx, 8      
06592                                 punpcklbw mm1, mm0      
06593                                 psrlw mm1, mm5          
06594                                 pmullw mm1, mm3         
06595                                 paddsw mm7, mm1         
06596                                 
06597                                 movq mm1, [esi]         
06598                         movq mm2, mm1           
06599                                 inc              esi            
06600                                 movq mm3, [edx]         
06601                         add edx, 8      
06602                                 movq mm4, [edx]         
06603                         add edx, 8      
06604                                 punpcklbw mm1, mm0      
06605                                 punpckhbw mm2, mm0      
06606                                 psrlw mm1, mm5          
06607                                 psrlw mm2, mm5          
06608                                 pmullw mm1, mm3         
06609                                 pmullw mm2, mm4         
06610                                 paddsw mm1, mm2         
06611                                 paddsw mm7, mm1         
06612                                 movq mm1, [esi]         
06613                         dec              esi
06614                                 add esi, eax    
06615                                 movq mm3, [edx]         
06616                         add edx, 8      
06617                                 punpcklbw mm1, mm0      
06618                                 psrlw mm1, mm5          
06619                                 pmullw mm1, mm3         
06620                                 paddsw mm7, mm1         
06621                                 
06622                                 movq mm1, [esi]         
06623                         movq mm2, mm1           
06624                                 inc              esi            
06625                                 movq mm3, [edx]         
06626                         add edx, 8      
06627                                 movq mm4, [edx]         
06628                         add edx, 8      
06629                                 punpcklbw mm1, mm0      
06630                                 punpckhbw mm2, mm0      
06631                                 psrlw mm1, mm5          
06632                                 psrlw mm2, mm5          
06633                                 pmullw mm1, mm3         
06634                                 pmullw mm2, mm4         
06635                                 paddsw mm1, mm2         
06636                                 paddsw mm7, mm1         
06637                                 movq mm1, [esi]         
06638                         dec              esi
06639                                 add esi, eax    
06640                                 movq mm3, [edx]         
06641                         add edx, 8      
06642                                 punpcklbw mm1, mm0      
06643                                 psrlw mm1, mm5          
06644                                 pmullw mm1, mm3         
06645                                 paddsw mm7, mm1         
06646                                 
06647                                 movq mm1, [esi]         
06648                         movq mm2, mm1           
06649                                 inc              esi            
06650                                 movq mm3, [edx]         
06651                         add edx, 8      
06652                                 movq mm4, [edx]         
06653                         add edx, 8      
06654                                 punpcklbw mm1, mm0      
06655                                 punpckhbw mm2, mm0      
06656                                 psrlw mm1, mm5          
06657                                 psrlw mm2, mm5          
06658                                 pmullw mm1, mm3         
06659                                 pmullw mm2, mm4         
06660                                 paddsw mm1, mm2         
06661                                 paddsw mm7, mm1         
06662                                 movq mm1, [esi]         
06663                         movq mm3, [edx]         
06664                         punpcklbw mm1, mm0      
06665                                 psrlw mm1, mm5          
06666                                 pmullw mm1, mm3         
06667                                 paddsw mm7, mm1         
06668                                 
06669                                 movq mm3, mm7           
06670                                 psrlq mm7, 32           
06671                                 paddsw mm7, mm3         
06672                                 movq mm2, mm7           
06673                                 psrlq mm7, 16           
06674                                 paddsw mm7, mm2         
06675                                 movd mm1, eax           
06676                                 packuswb mm7, mm0       
06677                                 movd eax, mm7           
06678                                 mov [edi], al           
06679                                 movd eax, mm1           
06680                                 
06681                                 movd esi, mm6           
06682                                 sub edx, 208    
06683                                 inc              esi            
06684                                 inc              edi            
06685                                 
06686                                 dec              ecx            
06687                                 jnz            L10392           
06688                                 add esi, 8      
06689                                 add edi, 8      
06690                                 dec              ebx            
06691                                 jnz            L10390           
06692                                 
06693                                 emms                            
06694                                 popa
06695                 }
06696 #else
06697                 asm volatile
06698                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
06699                         "xor       %%ebx, %%ebx \n\t"   
06700                         "mov           %5, %%bl \n\t"   
06701                         "movd      %%ebx, %%mm5 \n\t"   
06702                         "mov          %4, %%edx \n\t"   
06703                         "mov          %1, %%esi \n\t"   
06704                         "mov          %0, %%edi \n\t"   
06705                         "add          $4, %%edi \n\t"   
06706                         "mov          %3, %%eax \n\t"   
06707                         "add       %%eax, %%edi \n\t"   
06708                         "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "mov          %2, %%ebx \n\t" 
06709                         "sub          $8, %%ebx \n\t"   
06710                         
06711                         ".L10390:               \n\t" "mov       %%eax, %%ecx \n\t"     
06712                         "sub          $8, %%ecx \n\t"   
06713                         ".align 16              \n\t"   
06714                         ".L10392:               \n\t" "pxor      %%mm7, %%mm7 \n\t"     
06715                         "movd      %%esi, %%mm6 \n\t"   
06716                         
06717                         "movq    (%%esi), %%mm1 \n\t"   
06718                         "movq      %%mm1, %%mm2 \n\t"   
06719                         "inc              %%esi \n\t"   
06720                         "movq    (%%edx), %%mm3 \n\t"   
06721                         "add          $8, %%edx \n\t"   
06722                         "movq    (%%edx), %%mm4 \n\t"   
06723                         "add          $8, %%edx \n\t"   
06724                         "punpcklbw %%mm0, %%mm1 \n\t"   
06725                         "punpckhbw %%mm0, %%mm2 \n\t"   
06726                         "psrlw     %%mm5, %%mm1 \n\t"   
06727                         "psrlw     %%mm5, %%mm2 \n\t"   
06728                         "pmullw    %%mm3, %%mm1 \n\t"   
06729                         "pmullw    %%mm4, %%mm2 \n\t"   
06730                         "paddsw    %%mm2, %%mm1 \n\t"   
06731                         "paddsw    %%mm1, %%mm7 \n\t"   
06732                         "movq    (%%esi), %%mm1 \n\t"   
06733                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
06734                         "movq    (%%edx), %%mm3 \n\t"   
06735                         "add          $8, %%edx \n\t"   
06736                         "punpcklbw %%mm0, %%mm1 \n\t"   
06737                         "psrlw     %%mm5, %%mm1 \n\t"   
06738                         "pmullw    %%mm3, %%mm1 \n\t"   
06739                         "paddsw    %%mm1, %%mm7 \n\t"   
06740                         
06741                         "movq    (%%esi), %%mm1 \n\t"   
06742                         "movq      %%mm1, %%mm2 \n\t"   
06743                         "inc              %%esi \n\t"   
06744                         "movq    (%%edx), %%mm3 \n\t"   
06745                         "add          $8, %%edx \n\t"   
06746                         "movq    (%%edx), %%mm4 \n\t"   
06747                         "add          $8, %%edx \n\t"   
06748                         "punpcklbw %%mm0, %%mm1 \n\t"   
06749                         "punpckhbw %%mm0, %%mm2 \n\t"   
06750                         "psrlw     %%mm5, %%mm1 \n\t"   
06751                         "psrlw     %%mm5, %%mm2 \n\t"   
06752                         "pmullw    %%mm3, %%mm1 \n\t"   
06753                         "pmullw    %%mm4, %%mm2 \n\t"   
06754                         "paddsw    %%mm2, %%mm1 \n\t"   
06755                         "paddsw    %%mm1, %%mm7 \n\t"   
06756                         "movq    (%%esi), %%mm1 \n\t"   
06757                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
06758                         "movq    (%%edx), %%mm3 \n\t"   
06759                         "add          $8, %%edx \n\t"   
06760                         "punpcklbw %%mm0, %%mm1 \n\t"   
06761                         "psrlw     %%mm5, %%mm1 \n\t"   
06762                         "pmullw    %%mm3, %%mm1 \n\t"   
06763                         "paddsw    %%mm1, %%mm7 \n\t"   
06764                         
06765                         "movq    (%%esi), %%mm1 \n\t"   
06766                         "movq      %%mm1, %%mm2 \n\t"   
06767                         "inc              %%esi \n\t"   
06768                         "movq    (%%edx), %%mm3 \n\t"   
06769                         "add          $8, %%edx \n\t"   
06770                         "movq    (%%edx), %%mm4 \n\t"   
06771                         "add          $8, %%edx \n\t"   
06772                         "punpcklbw %%mm0, %%mm1 \n\t"   
06773                         "punpckhbw %%mm0, %%mm2 \n\t"   
06774                         "psrlw     %%mm5, %%mm1 \n\t"   
06775                         "psrlw     %%mm5, %%mm2 \n\t"   
06776                         "pmullw    %%mm3, %%mm1 \n\t"   
06777                         "pmullw    %%mm4, %%mm2 \n\t"   
06778                         "paddsw    %%mm2, %%mm1 \n\t"   
06779                         "paddsw    %%mm1, %%mm7 \n\t"   
06780                         "movq    (%%esi), %%mm1 \n\t"   
06781                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
06782                         "movq    (%%edx), %%mm3 \n\t"   
06783                         "add          $8, %%edx \n\t"   
06784                         "punpcklbw %%mm0, %%mm1 \n\t"   
06785                         "psrlw     %%mm5, %%mm1 \n\t"   
06786                         "pmullw    %%mm3, %%mm1 \n\t"   
06787                         "paddsw    %%mm1, %%mm7 \n\t"   
06788                         
06789                         "movq    (%%esi), %%mm1 \n\t"   
06790                         "movq      %%mm1, %%mm2 \n\t"   
06791                         "inc              %%esi \n\t"   
06792                         "movq    (%%edx), %%mm3 \n\t"   
06793                         "add          $8, %%edx \n\t"   
06794                         "movq    (%%edx), %%mm4 \n\t"   
06795                         "add          $8, %%edx \n\t"   
06796                         "punpcklbw %%mm0, %%mm1 \n\t"   
06797                         "punpckhbw %%mm0, %%mm2 \n\t"   
06798                         "psrlw     %%mm5, %%mm1 \n\t"   
06799                         "psrlw     %%mm5, %%mm2 \n\t"   
06800                         "pmullw    %%mm3, %%mm1 \n\t"   
06801                         "pmullw    %%mm4, %%mm2 \n\t"   
06802                         "paddsw    %%mm2, %%mm1 \n\t"   
06803                         "paddsw    %%mm1, %%mm7 \n\t"   
06804                         "movq    (%%esi), %%mm1 \n\t"   
06805                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
06806                         "movq    (%%edx), %%mm3 \n\t"   
06807                         "add          $8, %%edx \n\t"   
06808                         "punpcklbw %%mm0, %%mm1 \n\t"   
06809                         "psrlw     %%mm5, %%mm1 \n\t"   
06810                         "pmullw    %%mm3, %%mm1 \n\t"   
06811                         "paddsw    %%mm1, %%mm7 \n\t"   
06812                         
06813                         "movq    (%%esi), %%mm1 \n\t"   
06814                         "movq      %%mm1, %%mm2 \n\t"   
06815                         "inc              %%esi \n\t"   
06816                         "movq    (%%edx), %%mm3 \n\t"   
06817                         "add          $8, %%edx \n\t"   
06818                         "movq    (%%edx), %%mm4 \n\t"   
06819                         "add          $8, %%edx \n\t"   
06820                         "punpcklbw %%mm0, %%mm1 \n\t"   
06821                         "punpckhbw %%mm0, %%mm2 \n\t"   
06822                         "psrlw     %%mm5, %%mm1 \n\t"   
06823                         "psrlw     %%mm5, %%mm2 \n\t"   
06824                         "pmullw    %%mm3, %%mm1 \n\t"   
06825                         "pmullw    %%mm4, %%mm2 \n\t"   
06826                         "paddsw    %%mm2, %%mm1 \n\t"   
06827                         "paddsw    %%mm1, %%mm7 \n\t"   
06828                         "movq    (%%esi), %%mm1 \n\t"   
06829                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
06830                         "movq    (%%edx), %%mm3 \n\t"   
06831                         "add          $8, %%edx \n\t"   
06832                         "punpcklbw %%mm0, %%mm1 \n\t"   
06833                         "psrlw     %%mm5, %%mm1 \n\t"   
06834                         "pmullw    %%mm3, %%mm1 \n\t"   
06835                         "paddsw    %%mm1, %%mm7 \n\t"   
06836                         
06837                         "movq    (%%esi), %%mm1 \n\t"   
06838                         "movq      %%mm1, %%mm2 \n\t"   
06839                         "inc              %%esi \n\t"   
06840                         "movq    (%%edx), %%mm3 \n\t"   
06841                         "add          $8, %%edx \n\t"   
06842                         "movq    (%%edx), %%mm4 \n\t"   
06843                         "add          $8, %%edx \n\t"   
06844                         "punpcklbw %%mm0, %%mm1 \n\t"   
06845                         "punpckhbw %%mm0, %%mm2 \n\t"   
06846                         "psrlw     %%mm5, %%mm1 \n\t"   
06847                         "psrlw     %%mm5, %%mm2 \n\t"   
06848                         "pmullw    %%mm3, %%mm1 \n\t"   
06849                         "pmullw    %%mm4, %%mm2 \n\t"   
06850                         "paddsw    %%mm2, %%mm1 \n\t"   
06851                         "paddsw    %%mm1, %%mm7 \n\t"   
06852                         "movq    (%%esi), %%mm1 \n\t"   
06853                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
06854                         "movq    (%%edx), %%mm3 \n\t"   
06855                         "add          $8, %%edx \n\t"   
06856                         "punpcklbw %%mm0, %%mm1 \n\t"   
06857                         "psrlw     %%mm5, %%mm1 \n\t"   
06858                         "pmullw    %%mm3, %%mm1 \n\t"   
06859                         "paddsw    %%mm1, %%mm7 \n\t"   
06860                         
06861                         "movq    (%%esi), %%mm1 \n\t"   
06862                         "movq      %%mm1, %%mm2 \n\t"   
06863                         "inc              %%esi \n\t"   
06864                         "movq    (%%edx), %%mm3 \n\t"   
06865                         "add          $8, %%edx \n\t"   
06866                         "movq    (%%edx), %%mm4 \n\t"   
06867                         "add          $8, %%edx \n\t"   
06868                         "punpcklbw %%mm0, %%mm1 \n\t"   
06869                         "punpckhbw %%mm0, %%mm2 \n\t"   
06870                         "psrlw     %%mm5, %%mm1 \n\t"   
06871                         "psrlw     %%mm5, %%mm2 \n\t"   
06872                         "pmullw    %%mm3, %%mm1 \n\t"   
06873                         "pmullw    %%mm4, %%mm2 \n\t"   
06874                         "paddsw    %%mm2, %%mm1 \n\t"   
06875                         "paddsw    %%mm1, %%mm7 \n\t"   
06876                         "movq    (%%esi), %%mm1 \n\t"   
06877                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
06878                         "movq    (%%edx), %%mm3 \n\t"   
06879                         "add          $8, %%edx \n\t"   
06880                         "punpcklbw %%mm0, %%mm1 \n\t"   
06881                         "psrlw     %%mm5, %%mm1 \n\t"   
06882                         "pmullw    %%mm3, %%mm1 \n\t"   
06883                         "paddsw    %%mm1, %%mm7 \n\t"   
06884                         
06885                         "movq    (%%esi), %%mm1 \n\t"   
06886                         "movq      %%mm1, %%mm2 \n\t"   
06887                         "inc              %%esi \n\t"   
06888                         "movq    (%%edx), %%mm3 \n\t"   
06889                         "add          $8, %%edx \n\t"   
06890                         "movq    (%%edx), %%mm4 \n\t"   
06891                         "add          $8, %%edx \n\t"   
06892                         "punpcklbw %%mm0, %%mm1 \n\t"   
06893                         "punpckhbw %%mm0, %%mm2 \n\t"   
06894                         "psrlw     %%mm5, %%mm1 \n\t"   
06895                         "psrlw     %%mm5, %%mm2 \n\t"   
06896                         "pmullw    %%mm3, %%mm1 \n\t"   
06897                         "pmullw    %%mm4, %%mm2 \n\t"   
06898                         "paddsw    %%mm2, %%mm1 \n\t"   
06899                         "paddsw    %%mm1, %%mm7 \n\t"   
06900                         "movq    (%%esi), %%mm1 \n\t"   
06901                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"     
06902                         "movq    (%%edx), %%mm3 \n\t"   
06903                         "add          $8, %%edx \n\t"   
06904                         "punpcklbw %%mm0, %%mm1 \n\t"   
06905                         "psrlw     %%mm5, %%mm1 \n\t"   
06906                         "pmullw    %%mm3, %%mm1 \n\t"   
06907                         "paddsw    %%mm1, %%mm7 \n\t"   
06908                         
06909                         "movq    (%%esi), %%mm1 \n\t"   
06910                         "movq      %%mm1, %%mm2 \n\t"   
06911                         "inc              %%esi \n\t"   
06912                         "movq    (%%edx), %%mm3 \n\t"   
06913                         "add          $8, %%edx \n\t"   
06914                         "movq    (%%edx), %%mm4 \n\t"   
06915                         "add          $8, %%edx \n\t"   
06916                         "punpcklbw %%mm0, %%mm1 \n\t"   
06917                         "punpckhbw %%mm0, %%mm2 \n\t"   
06918                         "psrlw     %%mm5, %%mm1 \n\t"   
06919                         "psrlw     %%mm5, %%mm2 \n\t"   
06920                         "pmullw    %%mm3, %%mm1 \n\t"   
06921                         "pmullw    %%mm4, %%mm2 \n\t"   
06922                         "paddsw    %%mm2, %%mm1 \n\t"   
06923                         "paddsw    %%mm1, %%mm7 \n\t"   
06924                         "movq    (%%esi), %%mm1 \n\t"   
06925                         "movq    (%%edx), %%mm3 \n\t"   
06926                         "punpcklbw %%mm0, %%mm1 \n\t"   
06927                         "psrlw     %%mm5, %%mm1 \n\t"   
06928                         "pmullw    %%mm3, %%mm1 \n\t"   
06929                         "paddsw    %%mm1, %%mm7 \n\t"   
06930                         
06931                         "movq      %%mm7, %%mm3 \n\t"   
06932                         "psrlq       $32, %%mm7 \n\t"   
06933                         "paddsw    %%mm3, %%mm7 \n\t"   
06934                         "movq      %%mm7, %%mm2 \n\t"   
06935                         "psrlq       $16, %%mm7 \n\t"   
06936                         "paddsw    %%mm2, %%mm7 \n\t"   
06937                         "movd      %%eax, %%mm1 \n\t"   
06938                         "packuswb  %%mm0, %%mm7 \n\t"   
06939                         "movd      %%mm7, %%eax \n\t"   
06940                         "mov      %%al, (%%edi) \n\t"   
06941                         "movd      %%mm1, %%eax \n\t"   
06942                         
06943                         "movd      %%mm6, %%esi \n\t"   
06944                         "sub        $208, %%edx \n\t"   
06945                         "inc              %%esi \n\t"   
06946                         "inc              %%edi \n\t"   
06947                         
06948                         "dec              %%ecx \n\t"   
06949                         "jnz            .L10392 \n\t"   
06950                         "add          $8, %%esi \n\t"   
06951                         "add          $8, %%edi \n\t"   
06952                         "dec              %%ebx \n\t"   
06953                         "jnz            .L10390 \n\t"   
06954                         
06955                         "emms                   \n\t"   
06956                         "popa                   \n\t":"=m" (Dest)       
06957                         :"m"(Src),              
06958                         "m"(rows),              
06959                         "m"(columns),           
06960                         "m"(Kernel),            
06961                         "m"(NRightShift)        
06962                         );
06963 #endif
06964 #endif
06965                 return (0);
06966         } else {
06967                 
06968                 return (-1);
06969         }
06970 }
06971 
06972 
06973 
06986 int SDL_imageFilterSobelX(unsigned char *Src, unsigned char *Dest, int rows, int columns)
06987 {
06988         
06989         if ((Src == NULL) || (Dest == NULL))
06990                 return(-1);
06991 
06992         if ((columns < 8) || (rows < 3))
06993                 return (-1);
06994 
06995         if ((SDL_imageFilterMMXdetect())) {
06996 #ifdef USE_MMX
06997 #if !defined(GCC__)
06998                 __asm
06999                 {
07000                         pusha
07001                                 pxor mm0, mm0           
07002                                 mov eax, columns        
07003                                 
07004                                 mov esi, Src    
07005                                 mov edi, Dest           
07006                                 add edi, eax    
07007                                 inc              edi            
07008                                 mov edx, rows           
07009                                 sub edx, 2      
07010                                 
07011 L10400:
07012                         mov ecx, eax    
07013                                 shr ecx, 3      
07014                                 mov ebx, esi    
07015                                 movd mm1, edi           
07016                                 align 16                        
07017 L10402:
07018                         
07019                         movq mm4, [esi]         
07020                         movq mm5, mm4           
07021                                 add esi, 2      
07022                                 punpcklbw mm4, mm0      
07023                                 punpckhbw mm5, mm0      
07024                                 movq mm6, [esi]         
07025                         movq mm7, mm6           
07026                                 sub esi, 2      
07027                                 punpcklbw mm6, mm0      
07028                                 punpckhbw mm7, mm0      
07029                                 add esi, eax    
07030                                 movq mm2, [esi]         
07031                         movq mm3, mm2           
07032                                 add esi, 2      
07033                                 punpcklbw mm2, mm0      
07034                                 punpckhbw mm3, mm0      
07035                                 paddw mm4, mm2          
07036                                 paddw mm5, mm3          
07037                                 paddw mm4, mm2          
07038                                 paddw mm5, mm3          
07039                                 movq mm2, [esi]         
07040                         movq mm3, mm2           
07041                                 sub esi, 2      
07042                                 punpcklbw mm2, mm0      
07043                                 punpckhbw mm3, mm0      
07044                                 paddw mm6, mm2          
07045                                 paddw mm7, mm3          
07046                                 paddw mm6, mm2          
07047                                 paddw mm7, mm3          
07048                                 add esi, eax    
07049                                 movq mm2, [esi]         
07050                         movq mm3, mm2           
07051                                 add esi, 2      
07052                                 punpcklbw mm2, mm0      
07053                                 punpckhbw mm3, mm0      
07054                                 paddw mm4, mm2          
07055                                 paddw mm5, mm3          
07056                                 movq mm2, [esi]         
07057                         movq mm3, mm2           
07058                                 sub esi, 2      
07059                                 punpcklbw mm2, mm0      
07060                                 punpckhbw mm3, mm0      
07061                                 paddw mm6, mm2          
07062                                 paddw mm7, mm3          
07063                                 
07064                                 movq mm2, mm4           
07065                                 psrlq mm4, 32           
07066                                 psubw mm4, mm2          
07067                                 movq mm3, mm6           
07068                                 psrlq mm6, 32           
07069                                 psubw mm6, mm3          
07070                                 punpckldq mm4, mm6      
07071                                 movq mm2, mm5           
07072                                 psrlq mm5, 32           
07073                                 psubw mm5, mm2          
07074                                 movq mm3, mm7           
07075                                 psrlq mm7, 32           
07076                                 psubw mm7, mm3          
07077                                 punpckldq mm5, mm7      
07078                                 
07079                                 movq mm6, mm4           
07080                                 movq mm7, mm5           
07081                                 psraw mm6, 15           
07082                                 psraw mm7, 15           
07083                                 pxor mm4, mm6           
07084                                 pxor mm5, mm7           
07085                                 psubsw mm4, mm6         
07086                                 psubsw mm5, mm7         
07087                                 packuswb mm4, mm5       
07088                                 movq [edi], mm4         
07089                                 
07090                                 sub esi, eax    
07091                                 sub esi, eax
07092                                 add esi, 8      
07093                                 add edi, 8      
07094                                 
07095                                 dec              ecx            
07096                                 jnz            L10402           
07097                                 mov esi, ebx    
07098                                 movd edi, mm1           
07099                                 add esi, eax    
07100                                 add edi, eax    
07101                                 dec              edx            
07102                                 jnz            L10400           
07103                                 
07104                                 emms                            
07105                                 popa
07106                 }
07107 #else
07108                 asm volatile
07109                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
07110                         "mov          %3, %%eax \n\t"   
07111                         
07112                         "mov          %1, %%esi \n\t"   
07113                         "mov          %0, %%edi \n\t"   
07114                         "add       %%eax, %%edi \n\t"   
07115                         "inc              %%edi \n\t"   
07116                         "mov          %2, %%edx \n\t"   
07117                         "sub          $2, %%edx \n\t"   
07118                         
07119                         ".L10400:                \n\t" "mov       %%eax, %%ecx \n\t"    
07120                         "shr          $3, %%ecx \n\t"   
07121                         "mov       %%esi, %%ebx \n\t"   
07122                         "movd      %%edi, %%mm1 \n\t"   
07123                         ".align 16              \n\t"   
07124                         ".L10402:               \n\t"
07125                         
07126                         "movq    (%%esi), %%mm4 \n\t"   
07127                         "movq      %%mm4, %%mm5 \n\t"   
07128                         "add          $2, %%esi \n\t"   
07129                         "punpcklbw %%mm0, %%mm4 \n\t"   
07130                         "punpckhbw %%mm0, %%mm5 \n\t"   
07131                         "movq    (%%esi), %%mm6 \n\t"   
07132                         "movq      %%mm6, %%mm7 \n\t"   
07133                         "sub          $2, %%esi \n\t"   
07134                         "punpcklbw %%mm0, %%mm6 \n\t"   
07135                         "punpckhbw %%mm0, %%mm7 \n\t"   
07136                         "add       %%eax, %%esi \n\t"   
07137                         "movq    (%%esi), %%mm2 \n\t"   
07138                         "movq      %%mm2, %%mm3 \n\t"   
07139                         "add          $2, %%esi \n\t"   
07140                         "punpcklbw %%mm0, %%mm2 \n\t"   
07141                         "punpckhbw %%mm0, %%mm3 \n\t"   
07142                         "paddw     %%mm2, %%mm4 \n\t"   
07143                         "paddw     %%mm3, %%mm5 \n\t"   
07144                         "paddw     %%mm2, %%mm4 \n\t"   
07145                         "paddw     %%mm3, %%mm5 \n\t"   
07146                         "movq    (%%esi), %%mm2 \n\t"   
07147                         "movq      %%mm2, %%mm3 \n\t"   
07148                         "sub          $2, %%esi \n\t"   
07149                         "punpcklbw %%mm0, %%mm2 \n\t"   
07150                         "punpckhbw %%mm0, %%mm3 \n\t"   
07151                         "paddw     %%mm2, %%mm6 \n\t"   
07152                         "paddw     %%mm3, %%mm7 \n\t"   
07153                         "paddw     %%mm2, %%mm6 \n\t"   
07154                         "paddw     %%mm3, %%mm7 \n\t"   
07155                         "add       %%eax, %%esi \n\t"   
07156                         "movq    (%%esi), %%mm2 \n\t"   
07157                         "movq      %%mm2, %%mm3 \n\t"   
07158                         "add          $2, %%esi \n\t"   
07159                         "punpcklbw %%mm0, %%mm2 \n\t"   
07160                         "punpckhbw %%mm0, %%mm3 \n\t"   
07161                         "paddw     %%mm2, %%mm4 \n\t"   
07162                         "paddw     %%mm3, %%mm5 \n\t"   
07163                         "movq    (%%esi), %%mm2 \n\t"   
07164                         "movq      %%mm2, %%mm3 \n\t"   
07165                         "sub          $2, %%esi \n\t"   
07166                         "punpcklbw %%mm0, %%mm2 \n\t"   
07167                         "punpckhbw %%mm0, %%mm3 \n\t"   
07168                         "paddw     %%mm2, %%mm6 \n\t"   
07169                         "paddw     %%mm3, %%mm7 \n\t"   
07170                         
07171                         "movq      %%mm4, %%mm2 \n\t"   
07172                         "psrlq       $32, %%mm4 \n\t"   
07173                         "psubw     %%mm2, %%mm4 \n\t"   
07174                         "movq      %%mm6, %%mm3 \n\t"   
07175                         "psrlq       $32, %%mm6 \n\t"   
07176                         "psubw     %%mm3, %%mm6 \n\t"   
07177                         "punpckldq %%mm6, %%mm4 \n\t"   
07178                         "movq      %%mm5, %%mm2 \n\t"   
07179                         "psrlq       $32, %%mm5 \n\t"   
07180                         "psubw     %%mm2, %%mm5 \n\t"   
07181                         "movq      %%mm7, %%mm3 \n\t"   
07182                         "psrlq       $32, %%mm7 \n\t"   
07183                         "psubw     %%mm3, %%mm7 \n\t"   
07184                         "punpckldq %%mm7, %%mm5 \n\t"   
07185                         
07186                         "movq      %%mm4, %%mm6 \n\t"   
07187                         "movq      %%mm5, %%mm7 \n\t"   
07188                         "psraw       $15, %%mm6 \n\t"   
07189                         "psraw       $15, %%mm7 \n\t"   
07190                         "pxor      %%mm6, %%mm4 \n\t"   
07191                         "pxor      %%mm7, %%mm5 \n\t"   
07192                         "psubsw    %%mm6, %%mm4 \n\t"   
07193                         "psubsw    %%mm7, %%mm5 \n\t"   
07194                         "packuswb  %%mm5, %%mm4 \n\t"   
07195                         "movq    %%mm4, (%%edi) \n\t"   
07196                         
07197                         "sub       %%eax, %%esi \n\t"   
07198                         "sub       %%eax, %%esi \n\t" "add $8,          %%esi \n\t"     
07199                         "add $8,          %%edi \n\t"   
07200                         
07201                         "dec              %%ecx \n\t"   
07202                         "jnz            .L10402 \n\t"   
07203                         "mov       %%ebx, %%esi \n\t"   
07204                         "movd      %%mm1, %%edi \n\t"   
07205                         "add       %%eax, %%esi \n\t"   
07206                         "add       %%eax, %%edi \n\t"   
07207                         "dec              %%edx \n\t"   
07208                         "jnz            .L10400 \n\t"   
07209                         
07210                         "emms                   \n\t"   
07211                         "popa                   \n\t":"=m" (Dest)       
07212                         :"m"(Src),              
07213                         "m"(rows),              
07214                         "m"(columns)            
07215                         );
07216 #endif
07217 #endif
07218                 return (0);
07219         } else {
07220                 
07221                 return (-1);
07222         }
07223 }
07224 
07238 int SDL_imageFilterSobelXShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
07239                                                                         unsigned char NRightShift)
07240 {
07241         
07242         if ((Src == NULL) || (Dest == NULL))
07243                 return(-1);
07244         if ((columns < 8) || (rows < 3) || (NRightShift > 7))
07245                 return (-1);
07246 
07247         if ((SDL_imageFilterMMXdetect())) {
07248 #ifdef USE_MMX
07249 #if !defined(GCC__)
07250                 __asm
07251                 {
07252                         pusha
07253                                 pxor mm0, mm0           
07254                                 mov eax, columns        
07255                                 xor ebx, ebx    
07256                                 mov bl, NRightShift     
07257                                 movd mm1, ebx           
07258                                 
07259                                 mov esi, Src    
07260                                 mov edi, Dest           
07261                                 add edi, eax    
07262                                 inc              edi            
07263                                 
07264                                 sub rows, 2     
07265                                 
07266 L10410:
07267                         mov ecx, eax    
07268                                 shr ecx, 3      
07269                                 mov ebx, esi    
07270                                 mov edx, edi    
07271                                 align 16                        
07272 L10412:
07273                         
07274                         movq mm4, [esi]         
07275                         movq mm5, mm4           
07276                                 add esi, 2      
07277                                 punpcklbw mm4, mm0      
07278                                 punpckhbw mm5, mm0      
07279                                 psrlw mm4, mm1          
07280                                 psrlw mm5, mm1          
07281                                 movq mm6, [esi]         
07282                         movq mm7, mm6           
07283                                 sub esi, 2      
07284                                 punpcklbw mm6, mm0      
07285                                 punpckhbw mm7, mm0      
07286                                 psrlw mm6, mm1          
07287                                 psrlw mm7, mm1          
07288                                 add esi, eax    
07289                                 movq mm2, [esi]         
07290                         movq mm3, mm2           
07291                                 add esi, 2      
07292                                 punpcklbw mm2, mm0      
07293                                 punpckhbw mm3, mm0      
07294                                 psrlw mm2, mm1          
07295                                 psrlw mm3, mm1          
07296                                 paddw mm4, mm2          
07297                                 paddw mm5, mm3          
07298                                 paddw mm4, mm2          
07299                                 paddw mm5, mm3          
07300                                 movq mm2, [esi]         
07301                         movq mm3, mm2           
07302                                 sub esi, 2      
07303                                 punpcklbw mm2, mm0      
07304                                 punpckhbw mm3, mm0      
07305                                 psrlw mm2, mm1          
07306                                 psrlw mm3, mm1          
07307                                 paddw mm6, mm2          
07308                                 paddw mm7, mm3          
07309                                 paddw mm6, mm2          
07310                                 paddw mm7, mm3          
07311                                 add esi, eax    
07312                                 movq mm2, [esi]         
07313                         movq mm3, mm2           
07314                                 add esi, 2      
07315                                 punpcklbw mm2, mm0      
07316                                 punpckhbw mm3, mm0      
07317                                 psrlw mm2, mm1          
07318                                 psrlw mm3, mm1          
07319                                 paddw mm4, mm2          
07320                                 paddw mm5, mm3          
07321                                 movq mm2, [esi]         
07322                         movq mm3, mm2           
07323                                 sub esi, 2      
07324                                 punpcklbw mm2, mm0      
07325                                 punpckhbw mm3, mm0      
07326                                 psrlw mm2, mm1          
07327                                 psrlw mm3, mm1          
07328                                 paddw mm6, mm2          
07329                                 paddw mm7, mm3          
07330                                 
07331                                 movq mm2, mm4           
07332                                 psrlq mm4, 32           
07333                                 psubw mm4, mm2          
07334                                 movq mm3, mm6           
07335                                 psrlq mm6, 32           
07336                                 psubw mm6, mm3          
07337                                 punpckldq mm4, mm6      
07338                                 movq mm2, mm5           
07339                                 psrlq mm5, 32           
07340                                 psubw mm5, mm2          
07341                                 movq mm3, mm7           
07342                                 psrlq mm7, 32           
07343                                 psubw mm7, mm3          
07344                                 punpckldq mm5, mm7      
07345                                 
07346                                 movq mm6, mm4           
07347                                 movq mm7, mm5           
07348                                 psraw mm6, 15           
07349                                 psraw mm7, 15           
07350                                 pxor mm4, mm6           
07351                                 pxor mm5, mm7           
07352                                 psubsw mm4, mm6         
07353                                 psubsw mm5, mm7         
07354                                 packuswb mm4, mm5       
07355                                 movq [edi], mm4         
07356                                 
07357                                 sub esi, eax    
07358                                 sub esi, eax
07359                                 add esi, 8      
07360                                 add edi, 8      
07361                                 
07362                                 dec              ecx            
07363                                 jnz            L10412           
07364                                 mov esi, ebx    
07365                                 mov edi, edx    
07366                                 add esi, eax    
07367                                 add edi, eax    
07368                                 dec rows        
07369                                 jnz            L10410           
07370                                 
07371                                 emms                            
07372                                 popa
07373                 }
07374 #else
07375                 asm volatile
07376                         ("pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"        
07377                         "mov          %3, %%eax \n\t"   
07378                         "xor       %%ebx, %%ebx \n\t"   
07379                         "mov           %4, %%bl \n\t"   
07380                         "movd      %%ebx, %%mm1 \n\t"   
07381                         
07382                         "mov          %1, %%esi \n\t"   
07383                         "mov          %0, %%edi \n\t"   
07384                         "add       %%eax, %%edi \n\t"   
07385                         "inc              %%edi \n\t"   
07386                         
07387                         "subl            $2, %2 \n\t"   
07388                         
07389                         ".L10410:                \n\t" "mov       %%eax, %%ecx \n\t"    
07390                         "shr          $3, %%ecx \n\t"   
07391                         "mov       %%esi, %%ebx \n\t"   
07392                         "mov       %%edi, %%edx \n\t"   
07393                         ".align 16              \n\t"   
07394                         ".L10412:               \n\t"
07395                         
07396                         "movq    (%%esi), %%mm4 \n\t"   
07397                         "movq      %%mm4, %%mm5 \n\t"   
07398                         "add          $2, %%esi \n\t"   
07399                         "punpcklbw %%mm0, %%mm4 \n\t"   
07400                         "punpckhbw %%mm0, %%mm5 \n\t"   
07401                         "psrlw     %%mm1, %%mm4 \n\t"   
07402                         "psrlw     %%mm1, %%mm5 \n\t"   
07403                         "movq    (%%esi), %%mm6 \n\t"   
07404                         "movq      %%mm6, %%mm7 \n\t"   
07405                         "sub          $2, %%esi \n\t"   
07406                         "punpcklbw %%mm0, %%mm6 \n\t"   
07407                         "punpckhbw %%mm0, %%mm7 \n\t"   
07408                         "psrlw     %%mm1, %%mm6 \n\t"   
07409                         "psrlw     %%mm1, %%mm7 \n\t"   
07410                         "add       %%eax, %%esi \n\t"   
07411                         "movq    (%%esi), %%mm2 \n\t"   
07412                         "movq      %%mm2, %%mm3 \n\t"   
07413                         "add          $2, %%esi \n\t"   
07414                         "punpcklbw %%mm0, %%mm2 \n\t"   
07415                         "punpckhbw %%mm0, %%mm3 \n\t"   
07416                         "psrlw     %%mm1, %%mm2 \n\t"   
07417                         "psrlw     %%mm1, %%mm3 \n\t"   
07418                         "paddw     %%mm2, %%mm4 \n\t"   
07419                         "paddw     %%mm3, %%mm5 \n\t"   
07420                         "paddw     %%mm2, %%mm4 \n\t"   
07421                         "paddw     %%mm3, %%mm5 \n\t"   
07422                         "movq    (%%esi), %%mm2 \n\t"   
07423                         "movq      %%mm2, %%mm3 \n\t"   
07424                         "sub          $2, %%esi \n\t"   
07425                         "punpcklbw %%mm0, %%mm2 \n\t"   
07426                         "punpckhbw %%mm0, %%mm3 \n\t"   
07427                         "psrlw     %%mm1, %%mm2 \n\t"   
07428                         "psrlw     %%mm1, %%mm3 \n\t"   
07429                         "paddw     %%mm2, %%mm6 \n\t"   
07430                         "paddw     %%mm3, %%mm7 \n\t"   
07431                         "paddw     %%mm2, %%mm6 \n\t"   
07432                         "paddw     %%mm3, %%mm7 \n\t"   
07433                         "add       %%eax, %%esi \n\t"   
07434                         "movq    (%%esi), %%mm2 \n\t"   
07435                         "movq      %%mm2, %%mm3 \n\t"   
07436                         "add          $2, %%esi \n\t"   
07437                         "punpcklbw %%mm0, %%mm2 \n\t"   
07438                         "punpckhbw %%mm0, %%mm3 \n\t"   
07439                         "psrlw     %%mm1, %%mm2 \n\t"   
07440                         "psrlw     %%mm1, %%mm3 \n\t"   
07441                         "paddw     %%mm2, %%mm4 \n\t"   
07442                         "paddw     %%mm3, %%mm5 \n\t"   
07443                         "movq    (%%esi), %%mm2 \n\t"   
07444                         "movq      %%mm2, %%mm3 \n\t"   
07445                         "sub          $2, %%esi \n\t"   
07446                         "punpcklbw %%mm0, %%mm2 \n\t"   
07447                         "punpckhbw %%mm0, %%mm3 \n\t"   
07448                         "psrlw     %%mm1, %%mm2 \n\t"   
07449                         "psrlw     %%mm1, %%mm3 \n\t"   
07450                         "paddw     %%mm2, %%mm6 \n\t"   
07451                         "paddw     %%mm3, %%mm7 \n\t"   
07452                         
07453                         "movq      %%mm4, %%mm2 \n\t"   
07454                         "psrlq       $32, %%mm4 \n\t"   
07455                         "psubw     %%mm2, %%mm4 \n\t"   
07456                         "movq      %%mm6, %%mm3 \n\t"   
07457                         "psrlq       $32, %%mm6 \n\t"   
07458                         "psubw     %%mm3, %%mm6 \n\t"   
07459                         "punpckldq %%mm6, %%mm4 \n\t"   
07460                         "movq      %%mm5, %%mm2 \n\t"   
07461                         "psrlq       $32, %%mm5 \n\t"   
07462                         "psubw     %%mm2, %%mm5 \n\t"   
07463                         "movq      %%mm7, %%mm3 \n\t"   
07464                         "psrlq       $32, %%mm7 \n\t"   
07465                         "psubw     %%mm3, %%mm7 \n\t"   
07466                         "punpckldq %%mm7, %%mm5 \n\t"   
07467                         
07468                         "movq      %%mm4, %%mm6 \n\t"   
07469                         "movq      %%mm5, %%mm7 \n\t"   
07470                         "psraw       $15, %%mm6 \n\t"   
07471                         "psraw       $15, %%mm7 \n\t"   
07472                         "pxor      %%mm6, %%mm4 \n\t"   
07473                         "pxor      %%mm7, %%mm5 \n\t"   
07474                         "psubsw    %%mm6, %%mm4 \n\t"   
07475                         "psubsw    %%mm7, %%mm5 \n\t"   
07476                         "packuswb  %%mm5, %%mm4 \n\t"   
07477                         "movq    %%mm4, (%%edi) \n\t"   
07478                         
07479                         "sub       %%eax, %%esi \n\t"   
07480                         "sub       %%eax, %%esi \n\t" "add $8,          %%esi \n\t"     
07481                         "add $8,          %%edi \n\t"   
07482                         
07483                         "dec              %%ecx \n\t"   
07484                         "jnz            .L10412 \n\t"   
07485                         "mov       %%ebx, %%esi \n\t"   
07486                         "mov       %%edx, %%edi \n\t"   
07487                         "add       %%eax, %%esi \n\t"   
07488                         "add       %%eax, %%edi \n\t"   
07489                         "decl                %2 \n\t"   
07490                         "jnz            .L10410 \n\t"   
07491                         
07492                         "emms                   \n\t"   
07493                         "popa                   \n\t":"=m" (Dest)       
07494                         :"m"(Src),              
07495                         "m"(rows),              
07496                         "m"(columns),           
07497                         "m"(NRightShift)        
07498                         );
07499 #endif
07500 #endif
07501                 return (0);
07502         } else {
07503                 
07504                 return (-1);
07505         }
07506 }
07507 
07511 void SDL_imageFilterAlignStack(void)
07512 {
07513 #ifdef USE_MMX
07514 #if !defined(GCC__)
07515         __asm
07516         {                               
07517                 mov ebx, esp    
07518                         sub ebx, 4      
07519                         and ebx, -32    
07520                         mov [ebx], esp          
07521                         mov esp, ebx    
07522         }
07523 #else
07524         asm volatile
07525                 (                               
07526                 "mov       %%esp, %%ebx \n\t"   
07527                 "sub          $4, %%ebx \n\t"   
07528                 "and        $-32, %%ebx \n\t"   
07529                 "mov     %%esp, (%%ebx) \n\t"   
07530                 "mov       %%ebx, %%esp \n\t"   
07531                 ::);
07532 #endif
07533 #endif
07534 }
07535 
07539 void SDL_imageFilterRestoreStack(void)
07540 {
07541 #ifdef USE_MMX
07542 #if !defined(GCC__)
07543         __asm
07544         {                               
07545                 mov ebx, [esp]          
07546                 mov esp, ebx    
07547         }
07548 #else
07549         asm volatile
07550                 (                               
07551                 "mov     (%%esp), %%ebx \n\t"   
07552                 "mov       %%ebx, %%esp \n\t"   
07553                 ::);
07554 #endif
07555 #endif
07556 }