annotate 3rdparty/compressor/fpaq0.cpp @ 0:a4671277546c tip

created the repository for the thymian project
author ferencd
date Tue, 17 Aug 2021 11:19:54 +0200
parents
children
rev   line source
ferencd@0 1 // fpaq0 - Stationary order 0 file compressor.
ferencd@0 2 // (C) 2004, Matt Mahoney under GPL, http://www.gnu.org/licenses/gpl.txt
ferencd@0 3 // To compile: g++ -O fpaq0.cpp
ferencd@0 4
ferencd@0 5 #include <cstdio>
ferencd@0 6 #include <cstdlib>
ferencd@0 7 #include <cstring>
ferencd@0 8 #include <ctime>
ferencd@0 9 #include <cassert>
ferencd@0 10 #include <iostream>
ferencd@0 11 #include "fpaq0.h"
ferencd@0 12
ferencd@0 13 using namespace std;
ferencd@0 14
ferencd@0 15
ferencd@0 16 // Constructor
ferencd@0 17 Encoder::Encoder(Mode m, Archive &parchive): predictor(), mode(m), archive(parchive), x1(0),
ferencd@0 18 x2(0xffffffff), x(0)
ferencd@0 19 {
ferencd@0 20
ferencd@0 21 // In DECOMPRESS mode, initialize x to the first 4 bytes of the archive
ferencd@0 22 if (mode==DECOMPRESS)
ferencd@0 23 {
ferencd@0 24 for (int i=0; i<4; ++i)
ferencd@0 25 {
ferencd@0 26 int c = archive.getc();
ferencd@0 27 if (c==EOF) c=0;
ferencd@0 28 x=(x<<8)+(c&0xff);
ferencd@0 29 }
ferencd@0 30 }
ferencd@0 31 }
ferencd@0 32
ferencd@0 33 /* encode(y) -- Encode bit y by splitting the range [x1, x2] in proportion
ferencd@0 34 to P(1) and P(0) as given by the predictor and narrowing to the appropriate
ferencd@0 35 subrange. Output leading bytes of the range as they become known. */
ferencd@0 36
ferencd@0 37 inline void Encoder::encode(int y)
ferencd@0 38 {
ferencd@0 39
ferencd@0 40 // Update the range
ferencd@0 41 const uint32_t xmid = x1 + ((x2-x1) >> 12) * predictor.p();
ferencd@0 42 assert(xmid >= x1 && xmid < x2);
ferencd@0 43 if (y)
ferencd@0 44 x2=xmid;
ferencd@0 45 else
ferencd@0 46 x1=xmid+1;
ferencd@0 47 predictor.update(y);
ferencd@0 48
ferencd@0 49 // Shift equal MSB's out
ferencd@0 50 while ( ((x1^x2) & 0xff000000) == 0)
ferencd@0 51 {
ferencd@0 52 archive.putc(x2>>24);
ferencd@0 53 x1 <<= 8;
ferencd@0 54 x2 = (x2<<8) + 255;
ferencd@0 55 }
ferencd@0 56 }
ferencd@0 57
ferencd@0 58 /* Decode one bit from the archive, splitting [x1, x2] as in the encoder
ferencd@0 59 and returning 1 or 0 depending on which subrange the archive point x is in.
ferencd@0 60 */
ferencd@0 61 inline int Encoder::decode()
ferencd@0 62 {
ferencd@0 63
ferencd@0 64 // Update the range
ferencd@0 65 const uint32_t xmid = x1 + ((x2-x1) >> 12) * predictor.p();
ferencd@0 66 assert(xmid >= x1 && xmid < x2);
ferencd@0 67 int y=0;
ferencd@0 68 if (x<=xmid)
ferencd@0 69 {
ferencd@0 70 y=1;
ferencd@0 71 x2=xmid;
ferencd@0 72 }
ferencd@0 73 else
ferencd@0 74 x1=xmid+1;
ferencd@0 75 predictor.update(y);
ferencd@0 76
ferencd@0 77 // Shift equal MSB's out
ferencd@0 78 while (((x1^x2)&0xff000000)==0)
ferencd@0 79 {
ferencd@0 80 x1 <<= 8;
ferencd@0 81 x2=(x2<<8)+255;
ferencd@0 82 int c = archive.getc();
ferencd@0 83 if (c==EOF) c=0;
ferencd@0 84 x = (x<<8) + c;
ferencd@0 85 }
ferencd@0 86 return y;
ferencd@0 87 }
ferencd@0 88
ferencd@0 89 // Should be called when there is no more to compress
ferencd@0 90 void Encoder::flush()
ferencd@0 91 {
ferencd@0 92
ferencd@0 93 // In COMPRESS mode, write out the remaining bytes of x, x1 < x < x2
ferencd@0 94 if (mode==COMPRESS)
ferencd@0 95 {
ferencd@0 96 while (((x1^x2)&0xff000000)==0)
ferencd@0 97 {
ferencd@0 98 archive.putc(x2>>24);
ferencd@0 99 x1 <<= 8;
ferencd@0 100 x2 = (x2<<8) + 255;
ferencd@0 101 }
ferencd@0 102 archive.putc(x2>>24); // First unequal byte
ferencd@0 103 }
ferencd@0 104 }
ferencd@0 105
ferencd@0 106 std::vector<unsigned char> compress(const std::string& s)
ferencd@0 107 {
ferencd@0 108 Archive a;
ferencd@0 109 Encoder e(COMPRESS, a);
ferencd@0 110 char c = 0;
ferencd@0 111 size_t idx = 0;
ferencd@0 112 while ( idx < s.length() )
ferencd@0 113 {
ferencd@0 114 c = s[idx];
ferencd@0 115 e.encode(0);
ferencd@0 116 for (int i=7; i>=0; --i)
ferencd@0 117 {
ferencd@0 118 e.encode((c>>i)&1);
ferencd@0 119 }
ferencd@0 120 idx ++;
ferencd@0 121 }
ferencd@0 122 e.encode(1); // EOF code
ferencd@0 123 e.flush();
ferencd@0 124
ferencd@0 125 return a.data();
ferencd@0 126 }
ferencd@0 127
ferencd@0 128 std::string decompress(const std::vector<unsigned char>& v)
ferencd@0 129 {
ferencd@0 130 Archive a(v);
ferencd@0 131 std::string result;
ferencd@0 132 Encoder e(DECOMPRESS, a);
ferencd@0 133 while (!e.decode())
ferencd@0 134 {
ferencd@0 135 int c=1;
ferencd@0 136 while (c<256)
ferencd@0 137 c += c + e.decode();
ferencd@0 138
ferencd@0 139 char temp[2] = {0};
ferencd@0 140 temp[0] = c - 256;
ferencd@0 141
ferencd@0 142 result += temp;
ferencd@0 143 }
ferencd@0 144 return result;
ferencd@0 145 }
ferencd@0 146
ferencd@0 147 int Predictor::p() const
ferencd@0 148 {
ferencd@0 149 return 4096*(ct[cxt][1]+1)/(ct[cxt][0]+ct[cxt][1]+2);
ferencd@0 150 }
ferencd@0 151
ferencd@0 152 void Predictor::update(int y)
ferencd@0 153 {
ferencd@0 154 if (++ct[cxt][y] > 65534)
ferencd@0 155 {
ferencd@0 156 ct[cxt][0] >>= 1;
ferencd@0 157 ct[cxt][1] >>= 1;
ferencd@0 158 }
ferencd@0 159 if ((cxt+=cxt+y) >= 512)
ferencd@0 160 cxt=1;
ferencd@0 161 }