tif_lzw.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230
  1. /*
  2. * Copyright (c) 1988-1997 Sam Leffler
  3. * Copyright (c) 1991-1997 Silicon Graphics, Inc.
  4. *
  5. * Permission to use, copy, modify, distribute, and sell this software and
  6. * its documentation for any purpose is hereby granted without fee, provided
  7. * that (i) the above copyright notices and this permission notice appear in
  8. * all copies of the software and related documentation, and (ii) the names of
  9. * Sam Leffler and Silicon Graphics may not be used in any advertising or
  10. * publicity relating to the software without the specific, prior written
  11. * permission of Sam Leffler and Silicon Graphics.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
  14. * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
  15. * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
  18. * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
  19. * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  20. * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
  21. * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  22. * OF THIS SOFTWARE.
  23. */
  24. #include "tiffiop.h"
  25. #ifdef LZW_SUPPORT
  26. /*
  27. * TIFF Library.
  28. * Rev 5.0 Lempel-Ziv & Welch Compression Support
  29. *
  30. * This code is derived from the compress program whose code is
  31. * derived from software contributed to Berkeley by James A. Woods,
  32. * derived from original work by Spencer Thomas and Joseph Orost.
  33. *
  34. * The original Berkeley copyright notice appears below in its entirety.
  35. */
  36. #include "tif_predict.h"
  37. #include <stdio.h>
  38. /*
  39. * NB: The 5.0 spec describes a different algorithm than Aldus
  40. * implements. Specifically, Aldus does code length transitions
  41. * one code earlier than should be done (for real LZW).
  42. * Earlier versions of this library implemented the correct
  43. * LZW algorithm, but emitted codes in a bit order opposite
  44. * to the TIFF spec. Thus, to maintain compatibility w/ Aldus
  45. * we interpret MSB-LSB ordered codes to be images written w/
  46. * old versions of this library, but otherwise adhere to the
  47. * Aldus "off by one" algorithm.
  48. *
  49. * Future revisions to the TIFF spec are expected to "clarify this issue".
  50. */
  51. #define LZW_COMPAT /* include backwards compatibility code */
  52. /*
  53. * Each strip of data is supposed to be terminated by a CODE_EOI.
  54. * If the following #define is included, the decoder will also
  55. * check for end-of-strip w/o seeing this code. This makes the
  56. * library more robust, but also slower.
  57. */
  58. #define LZW_CHECKEOS /* include checks for strips w/o EOI code */
  59. #define MAXCODE(n) ((1L<<(n))-1)
  60. /*
  61. * The TIFF spec specifies that encoded bit
  62. * strings range from 9 to 12 bits.
  63. */
  64. #define BITS_MIN 9 /* start with 9 bits */
  65. #define BITS_MAX 12 /* max of 12 bit strings */
  66. /* predefined codes */
  67. #define CODE_CLEAR 256 /* code to clear string table */
  68. #define CODE_EOI 257 /* end-of-information code */
  69. #define CODE_FIRST 258 /* first free code entry */
  70. #define CODE_MAX MAXCODE(BITS_MAX)
  71. #define HSIZE 9001L /* 91% occupancy */
  72. #define HSHIFT (13-8)
  73. #ifdef LZW_COMPAT
  74. /* NB: +1024 is for compatibility with old files */
  75. #define CSIZE (MAXCODE(BITS_MAX)+1024L)
  76. #else
  77. #define CSIZE (MAXCODE(BITS_MAX)+1L)
  78. #endif
  79. /*
  80. * State block for each open TIFF file using LZW
  81. * compression/decompression. Note that the predictor
  82. * state block must be first in this data structure.
  83. */
  84. typedef struct {
  85. TIFFPredictorState predict; /* predictor super class */
  86. unsigned short nbits; /* # of bits/code */
  87. unsigned short maxcode; /* maximum code for lzw_nbits */
  88. unsigned short free_ent; /* next free entry in hash table */
  89. unsigned long nextdata; /* next bits of i/o */
  90. long nextbits; /* # of valid bits in lzw_nextdata */
  91. int rw_mode; /* preserve rw_mode from init */
  92. } LZWBaseState;
  93. #define lzw_nbits base.nbits
  94. #define lzw_maxcode base.maxcode
  95. #define lzw_free_ent base.free_ent
  96. #define lzw_nextdata base.nextdata
  97. #define lzw_nextbits base.nextbits
  98. /*
  99. * Encoding-specific state.
  100. */
  101. typedef uint16 hcode_t; /* codes fit in 16 bits */
  102. typedef struct {
  103. long hash;
  104. hcode_t code;
  105. } hash_t;
  106. /*
  107. * Decoding-specific state.
  108. */
  109. typedef struct code_ent {
  110. struct code_ent *next;
  111. unsigned short length; /* string len, including this token */
  112. unsigned char value; /* data value */
  113. unsigned char firstchar; /* first token of string */
  114. } code_t;
  115. typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16);
  116. typedef struct {
  117. LZWBaseState base;
  118. /* Decoding specific data */
  119. long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */
  120. long dec_restart; /* restart count */
  121. #ifdef LZW_CHECKEOS
  122. uint64 dec_bitsleft; /* available bits in raw data */
  123. tmsize_t old_tif_rawcc; /* value of tif_rawcc at the end of the previous TIFLZWDecode() call */
  124. #endif
  125. decodeFunc dec_decode; /* regular or backwards compatible */
  126. code_t* dec_codep; /* current recognized code */
  127. code_t* dec_oldcodep; /* previously recognized code */
  128. code_t* dec_free_entp; /* next free entry */
  129. code_t* dec_maxcodep; /* max available entry */
  130. code_t* dec_codetab; /* kept separate for small machines */
  131. /* Encoding specific data */
  132. int enc_oldcode; /* last code encountered */
  133. long enc_checkpoint; /* point at which to clear table */
  134. #define CHECK_GAP 10000 /* enc_ratio check interval */
  135. long enc_ratio; /* current compression ratio */
  136. long enc_incount; /* (input) data bytes encoded */
  137. long enc_outcount; /* encoded (output) bytes */
  138. uint8* enc_rawlimit; /* bound on tif_rawdata buffer */
  139. hash_t* enc_hashtab; /* kept separate for small machines */
  140. } LZWCodecState;
  141. #define LZWState(tif) ((LZWBaseState*) (tif)->tif_data)
  142. #define DecoderState(tif) ((LZWCodecState*) LZWState(tif))
  143. #define EncoderState(tif) ((LZWCodecState*) LZWState(tif))
  144. static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
  145. #ifdef LZW_COMPAT
  146. static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
  147. #endif
  148. static void cl_hash(LZWCodecState*);
  149. /*
  150. * LZW Decoder.
  151. */
  152. #ifdef LZW_CHECKEOS
  153. /*
  154. * This check shouldn't be necessary because each
  155. * strip is suppose to be terminated with CODE_EOI.
  156. */
  157. #define NextCode(_tif, _sp, _bp, _code, _get) { \
  158. if ((_sp)->dec_bitsleft < (uint64)nbits) { \
  159. TIFFWarningExt(_tif->tif_clientdata, module, \
  160. "LZWDecode: Strip %d not terminated with EOI code", \
  161. _tif->tif_curstrip); \
  162. _code = CODE_EOI; \
  163. } else { \
  164. _get(_sp,_bp,_code); \
  165. (_sp)->dec_bitsleft -= nbits; \
  166. } \
  167. }
  168. #else
  169. #define NextCode(tif, sp, bp, code, get) get(sp, bp, code)
  170. #endif
  171. static int
  172. LZWFixupTags(TIFF* tif)
  173. {
  174. (void) tif;
  175. return (1);
  176. }
  177. static int
  178. LZWSetupDecode(TIFF* tif)
  179. {
  180. static const char module[] = "LZWSetupDecode";
  181. LZWCodecState* sp = DecoderState(tif);
  182. int code;
  183. if( sp == NULL )
  184. {
  185. /*
  186. * Allocate state block so tag methods have storage to record
  187. * values.
  188. */
  189. tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState));
  190. if (tif->tif_data == NULL)
  191. {
  192. TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block");
  193. return (0);
  194. }
  195. sp = DecoderState(tif);
  196. sp->dec_codetab = NULL;
  197. sp->dec_decode = NULL;
  198. /*
  199. * Setup predictor setup.
  200. */
  201. (void) TIFFPredictorInit(tif);
  202. }
  203. if (sp->dec_codetab == NULL) {
  204. sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t));
  205. if (sp->dec_codetab == NULL) {
  206. TIFFErrorExt(tif->tif_clientdata, module,
  207. "No space for LZW code table");
  208. return (0);
  209. }
  210. /*
  211. * Pre-load the table.
  212. */
  213. code = 255;
  214. do {
  215. sp->dec_codetab[code].value = (unsigned char)code;
  216. sp->dec_codetab[code].firstchar = (unsigned char)code;
  217. sp->dec_codetab[code].length = 1;
  218. sp->dec_codetab[code].next = NULL;
  219. } while (code--);
  220. /*
  221. * Zero-out the unused entries
  222. */
  223. /* Silence false positive */
  224. /* coverity[overrun-buffer-arg] */
  225. _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0,
  226. (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
  227. }
  228. return (1);
  229. }
  230. /*
  231. * Setup state for decoding a strip.
  232. */
  233. static int
  234. LZWPreDecode(TIFF* tif, uint16 s)
  235. {
  236. static const char module[] = "LZWPreDecode";
  237. LZWCodecState *sp = DecoderState(tif);
  238. (void) s;
  239. assert(sp != NULL);
  240. if( sp->dec_codetab == NULL )
  241. {
  242. tif->tif_setupdecode( tif );
  243. if( sp->dec_codetab == NULL )
  244. return (0);
  245. }
  246. /*
  247. * Check for old bit-reversed codes.
  248. */
  249. if (tif->tif_rawcc >= 2 &&
  250. tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) {
  251. #ifdef LZW_COMPAT
  252. if (!sp->dec_decode) {
  253. TIFFWarningExt(tif->tif_clientdata, module,
  254. "Old-style LZW codes, convert file");
  255. /*
  256. * Override default decoding methods with
  257. * ones that deal with the old coding.
  258. * Otherwise the predictor versions set
  259. * above will call the compatibility routines
  260. * through the dec_decode method.
  261. */
  262. tif->tif_decoderow = LZWDecodeCompat;
  263. tif->tif_decodestrip = LZWDecodeCompat;
  264. tif->tif_decodetile = LZWDecodeCompat;
  265. /*
  266. * If doing horizontal differencing, must
  267. * re-setup the predictor logic since we
  268. * switched the basic decoder methods...
  269. */
  270. (*tif->tif_setupdecode)(tif);
  271. sp->dec_decode = LZWDecodeCompat;
  272. }
  273. sp->lzw_maxcode = MAXCODE(BITS_MIN);
  274. #else /* !LZW_COMPAT */
  275. if (!sp->dec_decode) {
  276. TIFFErrorExt(tif->tif_clientdata, module,
  277. "Old-style LZW codes not supported");
  278. sp->dec_decode = LZWDecode;
  279. }
  280. return (0);
  281. #endif/* !LZW_COMPAT */
  282. } else {
  283. sp->lzw_maxcode = MAXCODE(BITS_MIN)-1;
  284. sp->dec_decode = LZWDecode;
  285. }
  286. sp->lzw_nbits = BITS_MIN;
  287. sp->lzw_nextbits = 0;
  288. sp->lzw_nextdata = 0;
  289. sp->dec_restart = 0;
  290. sp->dec_nbitsmask = MAXCODE(BITS_MIN);
  291. #ifdef LZW_CHECKEOS
  292. sp->dec_bitsleft = 0;
  293. sp->old_tif_rawcc = 0;
  294. #endif
  295. sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
  296. /*
  297. * Zero entries that are not yet filled in. We do
  298. * this to guard against bogus input data that causes
  299. * us to index into undefined entries. If you can
  300. * come up with a way to safely bounds-check input codes
  301. * while decoding then you can remove this operation.
  302. */
  303. _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
  304. sp->dec_oldcodep = &sp->dec_codetab[-1];
  305. sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
  306. return (1);
  307. }
  308. /*
  309. * Decode a "hunk of data".
  310. */
  311. #define GetNextCode(sp, bp, code) { \
  312. nextdata = (nextdata<<8) | *(bp)++; \
  313. nextbits += 8; \
  314. if (nextbits < nbits) { \
  315. nextdata = (nextdata<<8) | *(bp)++; \
  316. nextbits += 8; \
  317. } \
  318. code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \
  319. nextbits -= nbits; \
  320. }
  321. static void
  322. codeLoop(TIFF* tif, const char* module)
  323. {
  324. TIFFErrorExt(tif->tif_clientdata, module,
  325. "Bogus encoding, loop in the code table; scanline %d",
  326. tif->tif_row);
  327. }
  328. static int
  329. LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
  330. {
  331. static const char module[] = "LZWDecode";
  332. LZWCodecState *sp = DecoderState(tif);
  333. char *op = (char*) op0;
  334. long occ = (long) occ0;
  335. char *tp;
  336. unsigned char *bp;
  337. hcode_t code;
  338. int len;
  339. long nbits, nextbits, nbitsmask;
  340. unsigned long nextdata;
  341. code_t *codep, *free_entp, *maxcodep, *oldcodep;
  342. (void) s;
  343. assert(sp != NULL);
  344. assert(sp->dec_codetab != NULL);
  345. /*
  346. Fail if value does not fit in long.
  347. */
  348. if ((tmsize_t) occ != occ0)
  349. return (0);
  350. /*
  351. * Restart interrupted output operation.
  352. */
  353. if (sp->dec_restart) {
  354. long residue;
  355. codep = sp->dec_codep;
  356. residue = codep->length - sp->dec_restart;
  357. if (residue > occ) {
  358. /*
  359. * Residue from previous decode is sufficient
  360. * to satisfy decode request. Skip to the
  361. * start of the decoded string, place decoded
  362. * values in the output buffer, and return.
  363. */
  364. sp->dec_restart += occ;
  365. do {
  366. codep = codep->next;
  367. } while (--residue > occ && codep);
  368. if (codep) {
  369. tp = op + occ;
  370. do {
  371. *--tp = codep->value;
  372. codep = codep->next;
  373. } while (--occ && codep);
  374. }
  375. return (1);
  376. }
  377. /*
  378. * Residue satisfies only part of the decode request.
  379. */
  380. op += residue;
  381. occ -= residue;
  382. tp = op;
  383. do {
  384. int t;
  385. --tp;
  386. t = codep->value;
  387. codep = codep->next;
  388. *tp = (char)t;
  389. } while (--residue && codep);
  390. sp->dec_restart = 0;
  391. }
  392. bp = (unsigned char *)tif->tif_rawcp;
  393. #ifdef LZW_CHECKEOS
  394. sp->dec_bitsleft += (((uint64)tif->tif_rawcc - sp->old_tif_rawcc) << 3);
  395. #endif
  396. nbits = sp->lzw_nbits;
  397. nextdata = sp->lzw_nextdata;
  398. nextbits = sp->lzw_nextbits;
  399. nbitsmask = sp->dec_nbitsmask;
  400. oldcodep = sp->dec_oldcodep;
  401. free_entp = sp->dec_free_entp;
  402. maxcodep = sp->dec_maxcodep;
  403. while (occ > 0) {
  404. NextCode(tif, sp, bp, code, GetNextCode);
  405. if (code == CODE_EOI)
  406. break;
  407. if (code == CODE_CLEAR) {
  408. do {
  409. free_entp = sp->dec_codetab + CODE_FIRST;
  410. _TIFFmemset(free_entp, 0,
  411. (CSIZE - CODE_FIRST) * sizeof (code_t));
  412. nbits = BITS_MIN;
  413. nbitsmask = MAXCODE(BITS_MIN);
  414. maxcodep = sp->dec_codetab + nbitsmask-1;
  415. NextCode(tif, sp, bp, code, GetNextCode);
  416. } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */
  417. if (code == CODE_EOI)
  418. break;
  419. if (code > CODE_CLEAR) {
  420. TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
  421. "LZWDecode: Corrupted LZW table at scanline %d",
  422. tif->tif_row);
  423. return (0);
  424. }
  425. *op++ = (char)code;
  426. occ--;
  427. oldcodep = sp->dec_codetab + code;
  428. continue;
  429. }
  430. codep = sp->dec_codetab + code;
  431. /*
  432. * Add the new entry to the code table.
  433. */
  434. if (free_entp < &sp->dec_codetab[0] ||
  435. free_entp >= &sp->dec_codetab[CSIZE]) {
  436. TIFFErrorExt(tif->tif_clientdata, module,
  437. "Corrupted LZW table at scanline %d",
  438. tif->tif_row);
  439. return (0);
  440. }
  441. free_entp->next = oldcodep;
  442. if (free_entp->next < &sp->dec_codetab[0] ||
  443. free_entp->next >= &sp->dec_codetab[CSIZE]) {
  444. TIFFErrorExt(tif->tif_clientdata, module,
  445. "Corrupted LZW table at scanline %d",
  446. tif->tif_row);
  447. return (0);
  448. }
  449. free_entp->firstchar = free_entp->next->firstchar;
  450. free_entp->length = free_entp->next->length+1;
  451. free_entp->value = (codep < free_entp) ?
  452. codep->firstchar : free_entp->firstchar;
  453. if (++free_entp > maxcodep) {
  454. if (++nbits > BITS_MAX) /* should not happen */
  455. nbits = BITS_MAX;
  456. nbitsmask = MAXCODE(nbits);
  457. maxcodep = sp->dec_codetab + nbitsmask-1;
  458. }
  459. oldcodep = codep;
  460. if (code >= 256) {
  461. /*
  462. * Code maps to a string, copy string
  463. * value to output (written in reverse).
  464. */
  465. if(codep->length == 0) {
  466. TIFFErrorExt(tif->tif_clientdata, module,
  467. "Wrong length of decoded string: "
  468. "data probably corrupted at scanline %d",
  469. tif->tif_row);
  470. return (0);
  471. }
  472. if (codep->length > occ) {
  473. /*
  474. * String is too long for decode buffer,
  475. * locate portion that will fit, copy to
  476. * the decode buffer, and setup restart
  477. * logic for the next decoding call.
  478. */
  479. sp->dec_codep = codep;
  480. do {
  481. codep = codep->next;
  482. } while (codep && codep->length > occ);
  483. if (codep) {
  484. sp->dec_restart = (long)occ;
  485. tp = op + occ;
  486. do {
  487. *--tp = codep->value;
  488. codep = codep->next;
  489. } while (--occ && codep);
  490. if (codep)
  491. codeLoop(tif, module);
  492. }
  493. break;
  494. }
  495. len = codep->length;
  496. tp = op + len;
  497. do {
  498. int t;
  499. --tp;
  500. t = codep->value;
  501. codep = codep->next;
  502. *tp = (char)t;
  503. } while (codep && tp > op);
  504. if (codep) {
  505. codeLoop(tif, module);
  506. break;
  507. }
  508. assert(occ >= len);
  509. op += len;
  510. occ -= len;
  511. } else {
  512. *op++ = (char)code;
  513. occ--;
  514. }
  515. }
  516. tif->tif_rawcc -= (tmsize_t)( (uint8*) bp - tif->tif_rawcp );
  517. tif->tif_rawcp = (uint8*) bp;
  518. #ifdef LZW_CHECKEOS
  519. sp->old_tif_rawcc = tif->tif_rawcc;
  520. #endif
  521. sp->lzw_nbits = (unsigned short) nbits;
  522. sp->lzw_nextdata = nextdata;
  523. sp->lzw_nextbits = nextbits;
  524. sp->dec_nbitsmask = nbitsmask;
  525. sp->dec_oldcodep = oldcodep;
  526. sp->dec_free_entp = free_entp;
  527. sp->dec_maxcodep = maxcodep;
  528. if (occ > 0) {
  529. #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
  530. TIFFErrorExt(tif->tif_clientdata, module,
  531. "Not enough data at scanline %d (short %I64d bytes)",
  532. tif->tif_row, (unsigned __int64) occ);
  533. #else
  534. TIFFErrorExt(tif->tif_clientdata, module,
  535. "Not enough data at scanline %d (short %llu bytes)",
  536. tif->tif_row, (unsigned long long) occ);
  537. #endif
  538. return (0);
  539. }
  540. return (1);
  541. }
  542. #ifdef LZW_COMPAT
  543. /*
  544. * Decode a "hunk of data" for old images.
  545. */
  546. #define GetNextCodeCompat(sp, bp, code) { \
  547. nextdata |= (unsigned long) *(bp)++ << nextbits; \
  548. nextbits += 8; \
  549. if (nextbits < nbits) { \
  550. nextdata |= (unsigned long) *(bp)++ << nextbits;\
  551. nextbits += 8; \
  552. } \
  553. code = (hcode_t)(nextdata & nbitsmask); \
  554. nextdata >>= nbits; \
  555. nextbits -= nbits; \
  556. }
  557. static int
  558. LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
  559. {
  560. static const char module[] = "LZWDecodeCompat";
  561. LZWCodecState *sp = DecoderState(tif);
  562. char *op = (char*) op0;
  563. long occ = (long) occ0;
  564. char *tp;
  565. unsigned char *bp;
  566. int code, nbits;
  567. int len;
  568. long nextbits, nextdata, nbitsmask;
  569. code_t *codep, *free_entp, *maxcodep, *oldcodep;
  570. (void) s;
  571. assert(sp != NULL);
  572. /*
  573. Fail if value does not fit in long.
  574. */
  575. if ((tmsize_t) occ != occ0)
  576. return (0);
  577. /*
  578. * Restart interrupted output operation.
  579. */
  580. if (sp->dec_restart) {
  581. long residue;
  582. codep = sp->dec_codep;
  583. residue = codep->length - sp->dec_restart;
  584. if (residue > occ) {
  585. /*
  586. * Residue from previous decode is sufficient
  587. * to satisfy decode request. Skip to the
  588. * start of the decoded string, place decoded
  589. * values in the output buffer, and return.
  590. */
  591. sp->dec_restart += occ;
  592. do {
  593. codep = codep->next;
  594. } while (--residue > occ);
  595. tp = op + occ;
  596. do {
  597. *--tp = codep->value;
  598. codep = codep->next;
  599. } while (--occ);
  600. return (1);
  601. }
  602. /*
  603. * Residue satisfies only part of the decode request.
  604. */
  605. op += residue;
  606. occ -= residue;
  607. tp = op;
  608. do {
  609. *--tp = codep->value;
  610. codep = codep->next;
  611. } while (--residue);
  612. sp->dec_restart = 0;
  613. }
  614. bp = (unsigned char *)tif->tif_rawcp;
  615. #ifdef LZW_CHECKEOS
  616. sp->dec_bitsleft += (((uint64)tif->tif_rawcc - sp->old_tif_rawcc) << 3);
  617. #endif
  618. nbits = sp->lzw_nbits;
  619. nextdata = sp->lzw_nextdata;
  620. nextbits = sp->lzw_nextbits;
  621. nbitsmask = sp->dec_nbitsmask;
  622. oldcodep = sp->dec_oldcodep;
  623. free_entp = sp->dec_free_entp;
  624. maxcodep = sp->dec_maxcodep;
  625. while (occ > 0) {
  626. NextCode(tif, sp, bp, code, GetNextCodeCompat);
  627. if (code == CODE_EOI)
  628. break;
  629. if (code == CODE_CLEAR) {
  630. do {
  631. free_entp = sp->dec_codetab + CODE_FIRST;
  632. _TIFFmemset(free_entp, 0,
  633. (CSIZE - CODE_FIRST) * sizeof (code_t));
  634. nbits = BITS_MIN;
  635. nbitsmask = MAXCODE(BITS_MIN);
  636. maxcodep = sp->dec_codetab + nbitsmask;
  637. NextCode(tif, sp, bp, code, GetNextCodeCompat);
  638. } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */
  639. if (code == CODE_EOI)
  640. break;
  641. if (code > CODE_CLEAR) {
  642. TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
  643. "LZWDecode: Corrupted LZW table at scanline %d",
  644. tif->tif_row);
  645. return (0);
  646. }
  647. *op++ = (char)code;
  648. occ--;
  649. oldcodep = sp->dec_codetab + code;
  650. continue;
  651. }
  652. codep = sp->dec_codetab + code;
  653. /*
  654. * Add the new entry to the code table.
  655. */
  656. if (free_entp < &sp->dec_codetab[0] ||
  657. free_entp >= &sp->dec_codetab[CSIZE]) {
  658. TIFFErrorExt(tif->tif_clientdata, module,
  659. "Corrupted LZW table at scanline %d", tif->tif_row);
  660. return (0);
  661. }
  662. free_entp->next = oldcodep;
  663. if (free_entp->next < &sp->dec_codetab[0] ||
  664. free_entp->next >= &sp->dec_codetab[CSIZE]) {
  665. TIFFErrorExt(tif->tif_clientdata, module,
  666. "Corrupted LZW table at scanline %d", tif->tif_row);
  667. return (0);
  668. }
  669. free_entp->firstchar = free_entp->next->firstchar;
  670. free_entp->length = free_entp->next->length+1;
  671. free_entp->value = (codep < free_entp) ?
  672. codep->firstchar : free_entp->firstchar;
  673. if (++free_entp > maxcodep) {
  674. if (++nbits > BITS_MAX) /* should not happen */
  675. nbits = BITS_MAX;
  676. nbitsmask = MAXCODE(nbits);
  677. maxcodep = sp->dec_codetab + nbitsmask;
  678. }
  679. oldcodep = codep;
  680. if (code >= 256) {
  681. /*
  682. * Code maps to a string, copy string
  683. * value to output (written in reverse).
  684. */
  685. if(codep->length == 0) {
  686. TIFFErrorExt(tif->tif_clientdata, module,
  687. "Wrong length of decoded "
  688. "string: data probably corrupted at scanline %d",
  689. tif->tif_row);
  690. return (0);
  691. }
  692. if (codep->length > occ) {
  693. /*
  694. * String is too long for decode buffer,
  695. * locate portion that will fit, copy to
  696. * the decode buffer, and setup restart
  697. * logic for the next decoding call.
  698. */
  699. sp->dec_codep = codep;
  700. do {
  701. codep = codep->next;
  702. } while (codep->length > occ);
  703. sp->dec_restart = occ;
  704. tp = op + occ;
  705. do {
  706. *--tp = codep->value;
  707. codep = codep->next;
  708. } while (--occ);
  709. break;
  710. }
  711. len = codep->length;
  712. tp = op + len;
  713. do {
  714. int t;
  715. --tp;
  716. t = codep->value;
  717. codep = codep->next;
  718. *tp = (char)t;
  719. } while (codep && tp > op);
  720. assert(occ >= len);
  721. op += len;
  722. occ -= len;
  723. } else {
  724. *op++ = (char)code;
  725. occ--;
  726. }
  727. }
  728. tif->tif_rawcc -= (tmsize_t)( (uint8*) bp - tif->tif_rawcp );
  729. tif->tif_rawcp = (uint8*) bp;
  730. #ifdef LZW_CHECKEOS
  731. sp->old_tif_rawcc = tif->tif_rawcc;
  732. #endif
  733. sp->lzw_nbits = (unsigned short)nbits;
  734. sp->lzw_nextdata = nextdata;
  735. sp->lzw_nextbits = nextbits;
  736. sp->dec_nbitsmask = nbitsmask;
  737. sp->dec_oldcodep = oldcodep;
  738. sp->dec_free_entp = free_entp;
  739. sp->dec_maxcodep = maxcodep;
  740. if (occ > 0) {
  741. #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
  742. TIFFErrorExt(tif->tif_clientdata, module,
  743. "Not enough data at scanline %d (short %I64d bytes)",
  744. tif->tif_row, (unsigned __int64) occ);
  745. #else
  746. TIFFErrorExt(tif->tif_clientdata, module,
  747. "Not enough data at scanline %d (short %llu bytes)",
  748. tif->tif_row, (unsigned long long) occ);
  749. #endif
  750. return (0);
  751. }
  752. return (1);
  753. }
  754. #endif /* LZW_COMPAT */
  755. /*
  756. * LZW Encoding.
  757. */
  758. static int
  759. LZWSetupEncode(TIFF* tif)
  760. {
  761. static const char module[] = "LZWSetupEncode";
  762. LZWCodecState* sp = EncoderState(tif);
  763. assert(sp != NULL);
  764. sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t));
  765. if (sp->enc_hashtab == NULL) {
  766. TIFFErrorExt(tif->tif_clientdata, module,
  767. "No space for LZW hash table");
  768. return (0);
  769. }
  770. return (1);
  771. }
  772. /*
  773. * Reset encoding state at the start of a strip.
  774. */
  775. static int
  776. LZWPreEncode(TIFF* tif, uint16 s)
  777. {
  778. LZWCodecState *sp = EncoderState(tif);
  779. (void) s;
  780. assert(sp != NULL);
  781. if( sp->enc_hashtab == NULL )
  782. {
  783. tif->tif_setupencode( tif );
  784. }
  785. sp->lzw_nbits = BITS_MIN;
  786. sp->lzw_maxcode = MAXCODE(BITS_MIN);
  787. sp->lzw_free_ent = CODE_FIRST;
  788. sp->lzw_nextbits = 0;
  789. sp->lzw_nextdata = 0;
  790. sp->enc_checkpoint = CHECK_GAP;
  791. sp->enc_ratio = 0;
  792. sp->enc_incount = 0;
  793. sp->enc_outcount = 0;
  794. /*
  795. * The 4 here insures there is space for 2 max-sized
  796. * codes in LZWEncode and LZWPostDecode.
  797. */
  798. sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4;
  799. cl_hash(sp); /* clear hash table */
  800. sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */
  801. return (1);
  802. }
  803. #define CALCRATIO(sp, rat) { \
  804. if (incount > 0x007fffff) { /* NB: shift will overflow */\
  805. rat = outcount >> 8; \
  806. rat = (rat == 0 ? 0x7fffffff : incount/rat); \
  807. } else \
  808. rat = (incount<<8) / outcount; \
  809. }
  810. /* Explicit 0xff masking to make icc -check=conversions happy */
  811. #define PutNextCode(op, c) { \
  812. nextdata = (nextdata << nbits) | c; \
  813. nextbits += nbits; \
  814. *op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff); \
  815. nextbits -= 8; \
  816. if (nextbits >= 8) { \
  817. *op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff); \
  818. nextbits -= 8; \
  819. } \
  820. outcount += nbits; \
  821. }
  822. /*
  823. * Encode a chunk of pixels.
  824. *
  825. * Uses an open addressing double hashing (no chaining) on the
  826. * prefix code/next character combination. We do a variant of
  827. * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
  828. * relatively-prime secondary probe. Here, the modular division
  829. * first probe is gives way to a faster exclusive-or manipulation.
  830. * Also do block compression with an adaptive reset, whereby the
  831. * code table is cleared when the compression ratio decreases,
  832. * but after the table fills. The variable-length output codes
  833. * are re-sized at this point, and a CODE_CLEAR is generated
  834. * for the decoder.
  835. */
  836. static int
  837. LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s)
  838. {
  839. register LZWCodecState *sp = EncoderState(tif);
  840. register long fcode;
  841. register hash_t *hp;
  842. register int h, c;
  843. hcode_t ent;
  844. long disp;
  845. long incount, outcount, checkpoint;
  846. unsigned long nextdata;
  847. long nextbits;
  848. int free_ent, maxcode, nbits;
  849. uint8* op;
  850. uint8* limit;
  851. (void) s;
  852. if (sp == NULL)
  853. return (0);
  854. assert(sp->enc_hashtab != NULL);
  855. /*
  856. * Load local state.
  857. */
  858. incount = sp->enc_incount;
  859. outcount = sp->enc_outcount;
  860. checkpoint = sp->enc_checkpoint;
  861. nextdata = sp->lzw_nextdata;
  862. nextbits = sp->lzw_nextbits;
  863. free_ent = sp->lzw_free_ent;
  864. maxcode = sp->lzw_maxcode;
  865. nbits = sp->lzw_nbits;
  866. op = tif->tif_rawcp;
  867. limit = sp->enc_rawlimit;
  868. ent = (hcode_t)sp->enc_oldcode;
  869. if (ent == (hcode_t) -1 && cc > 0) {
  870. /*
  871. * NB: This is safe because it can only happen
  872. * at the start of a strip where we know there
  873. * is space in the data buffer.
  874. */
  875. PutNextCode(op, CODE_CLEAR);
  876. ent = *bp++; cc--; incount++;
  877. }
  878. while (cc > 0) {
  879. c = *bp++; cc--; incount++;
  880. fcode = ((long)c << BITS_MAX) + ent;
  881. h = (c << HSHIFT) ^ ent; /* xor hashing */
  882. #ifdef _WINDOWS
  883. /*
  884. * Check hash index for an overflow.
  885. */
  886. if (h >= HSIZE)
  887. h -= HSIZE;
  888. #endif
  889. hp = &sp->enc_hashtab[h];
  890. if (hp->hash == fcode) {
  891. ent = hp->code;
  892. continue;
  893. }
  894. if (hp->hash >= 0) {
  895. /*
  896. * Primary hash failed, check secondary hash.
  897. */
  898. disp = HSIZE - h;
  899. if (h == 0)
  900. disp = 1;
  901. do {
  902. /*
  903. * Avoid pointer arithmetic because of
  904. * wraparound problems with segments.
  905. */
  906. if ((h -= disp) < 0)
  907. h += HSIZE;
  908. hp = &sp->enc_hashtab[h];
  909. if (hp->hash == fcode) {
  910. ent = hp->code;
  911. goto hit;
  912. }
  913. } while (hp->hash >= 0);
  914. }
  915. /*
  916. * New entry, emit code and add to table.
  917. */
  918. /*
  919. * Verify there is space in the buffer for the code
  920. * and any potential Clear code that might be emitted
  921. * below. The value of limit is setup so that there
  922. * are at least 4 bytes free--room for 2 codes.
  923. */
  924. if (op > limit) {
  925. tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
  926. if( !TIFFFlushData1(tif) )
  927. return 0;
  928. op = tif->tif_rawdata;
  929. }
  930. PutNextCode(op, ent);
  931. ent = (hcode_t)c;
  932. hp->code = (hcode_t)(free_ent++);
  933. hp->hash = fcode;
  934. if (free_ent == CODE_MAX-1) {
  935. /* table is full, emit clear code and reset */
  936. cl_hash(sp);
  937. sp->enc_ratio = 0;
  938. incount = 0;
  939. outcount = 0;
  940. free_ent = CODE_FIRST;
  941. PutNextCode(op, CODE_CLEAR);
  942. nbits = BITS_MIN;
  943. maxcode = MAXCODE(BITS_MIN);
  944. } else {
  945. /*
  946. * If the next entry is going to be too big for
  947. * the code size, then increase it, if possible.
  948. */
  949. if (free_ent > maxcode) {
  950. nbits++;
  951. assert(nbits <= BITS_MAX);
  952. maxcode = (int) MAXCODE(nbits);
  953. } else if (incount >= checkpoint) {
  954. long rat;
  955. /*
  956. * Check compression ratio and, if things seem
  957. * to be slipping, clear the hash table and
  958. * reset state. The compression ratio is a
  959. * 24+8-bit fractional number.
  960. */
  961. checkpoint = incount+CHECK_GAP;
  962. CALCRATIO(sp, rat);
  963. if (rat <= sp->enc_ratio) {
  964. cl_hash(sp);
  965. sp->enc_ratio = 0;
  966. incount = 0;
  967. outcount = 0;
  968. free_ent = CODE_FIRST;
  969. PutNextCode(op, CODE_CLEAR);
  970. nbits = BITS_MIN;
  971. maxcode = MAXCODE(BITS_MIN);
  972. } else
  973. sp->enc_ratio = rat;
  974. }
  975. }
  976. hit:
  977. ;
  978. }
  979. /*
  980. * Restore global state.
  981. */
  982. sp->enc_incount = incount;
  983. sp->enc_outcount = outcount;
  984. sp->enc_checkpoint = checkpoint;
  985. sp->enc_oldcode = ent;
  986. sp->lzw_nextdata = nextdata;
  987. sp->lzw_nextbits = nextbits;
  988. sp->lzw_free_ent = (unsigned short)free_ent;
  989. sp->lzw_maxcode = (unsigned short)maxcode;
  990. sp->lzw_nbits = (unsigned short)nbits;
  991. tif->tif_rawcp = op;
  992. return (1);
  993. }
  994. /*
  995. * Finish off an encoded strip by flushing the last
  996. * string and tacking on an End Of Information code.
  997. */
  998. static int
  999. LZWPostEncode(TIFF* tif)
  1000. {
  1001. register LZWCodecState *sp = EncoderState(tif);
  1002. uint8* op = tif->tif_rawcp;
  1003. long nextbits = sp->lzw_nextbits;
  1004. unsigned long nextdata = sp->lzw_nextdata;
  1005. long outcount = sp->enc_outcount;
  1006. int nbits = sp->lzw_nbits;
  1007. if (op > sp->enc_rawlimit) {
  1008. tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
  1009. if( !TIFFFlushData1(tif) )
  1010. return 0;
  1011. op = tif->tif_rawdata;
  1012. }
  1013. if (sp->enc_oldcode != (hcode_t) -1) {
  1014. int free_ent = sp->lzw_free_ent;
  1015. PutNextCode(op, sp->enc_oldcode);
  1016. sp->enc_oldcode = (hcode_t) -1;
  1017. free_ent ++;
  1018. if (free_ent == CODE_MAX-1) {
  1019. /* table is full, emit clear code and reset */
  1020. outcount = 0;
  1021. PutNextCode(op, CODE_CLEAR);
  1022. nbits = BITS_MIN;
  1023. } else {
  1024. /*
  1025. * If the next entry is going to be too big for
  1026. * the code size, then increase it, if possible.
  1027. */
  1028. if (free_ent > sp->lzw_maxcode) {
  1029. nbits++;
  1030. assert(nbits <= BITS_MAX);
  1031. }
  1032. }
  1033. }
  1034. PutNextCode(op, CODE_EOI);
  1035. /* Explicit 0xff masking to make icc -check=conversions happy */
  1036. if (nextbits > 0)
  1037. *op++ = (unsigned char)((nextdata << (8-nextbits))&0xff);
  1038. tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
  1039. return (1);
  1040. }
  1041. /*
  1042. * Reset encoding hash table.
  1043. */
  1044. static void
  1045. cl_hash(LZWCodecState* sp)
  1046. {
  1047. register hash_t *hp = &sp->enc_hashtab[HSIZE-1];
  1048. register long i = HSIZE-8;
  1049. do {
  1050. i -= 8;
  1051. hp[-7].hash = -1;
  1052. hp[-6].hash = -1;
  1053. hp[-5].hash = -1;
  1054. hp[-4].hash = -1;
  1055. hp[-3].hash = -1;
  1056. hp[-2].hash = -1;
  1057. hp[-1].hash = -1;
  1058. hp[ 0].hash = -1;
  1059. hp -= 8;
  1060. } while (i >= 0);
  1061. for (i += 8; i > 0; i--, hp--)
  1062. hp->hash = -1;
  1063. }
  1064. static void
  1065. LZWCleanup(TIFF* tif)
  1066. {
  1067. (void)TIFFPredictorCleanup(tif);
  1068. assert(tif->tif_data != 0);
  1069. if (DecoderState(tif)->dec_codetab)
  1070. _TIFFfree(DecoderState(tif)->dec_codetab);
  1071. if (EncoderState(tif)->enc_hashtab)
  1072. _TIFFfree(EncoderState(tif)->enc_hashtab);
  1073. _TIFFfree(tif->tif_data);
  1074. tif->tif_data = NULL;
  1075. _TIFFSetDefaultCompressionState(tif);
  1076. }
  1077. int
  1078. TIFFInitLZW(TIFF* tif, int scheme)
  1079. {
  1080. static const char module[] = "TIFFInitLZW";
  1081. (void)scheme;
  1082. assert(scheme == COMPRESSION_LZW);
  1083. /*
  1084. * Allocate state block so tag methods have storage to record values.
  1085. */
  1086. tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState));
  1087. if (tif->tif_data == NULL)
  1088. goto bad;
  1089. DecoderState(tif)->dec_codetab = NULL;
  1090. DecoderState(tif)->dec_decode = NULL;
  1091. EncoderState(tif)->enc_hashtab = NULL;
  1092. LZWState(tif)->rw_mode = tif->tif_mode;
  1093. /*
  1094. * Install codec methods.
  1095. */
  1096. tif->tif_fixuptags = LZWFixupTags;
  1097. tif->tif_setupdecode = LZWSetupDecode;
  1098. tif->tif_predecode = LZWPreDecode;
  1099. tif->tif_decoderow = LZWDecode;
  1100. tif->tif_decodestrip = LZWDecode;
  1101. tif->tif_decodetile = LZWDecode;
  1102. tif->tif_setupencode = LZWSetupEncode;
  1103. tif->tif_preencode = LZWPreEncode;
  1104. tif->tif_postencode = LZWPostEncode;
  1105. tif->tif_encoderow = LZWEncode;
  1106. tif->tif_encodestrip = LZWEncode;
  1107. tif->tif_encodetile = LZWEncode;
  1108. tif->tif_cleanup = LZWCleanup;
  1109. /*
  1110. * Setup predictor setup.
  1111. */
  1112. (void) TIFFPredictorInit(tif);
  1113. return (1);
  1114. bad:
  1115. TIFFErrorExt(tif->tif_clientdata, module,
  1116. "No space for LZW state block");
  1117. return (0);
  1118. }
  1119. /*
  1120. * Copyright (c) 1985, 1986 The Regents of the University of California.
  1121. * All rights reserved.
  1122. *
  1123. * This code is derived from software contributed to Berkeley by
  1124. * James A. Woods, derived from original work by Spencer Thomas
  1125. * and Joseph Orost.
  1126. *
  1127. * Redistribution and use in source and binary forms are permitted
  1128. * provided that the above copyright notice and this paragraph are
  1129. * duplicated in all such forms and that any documentation,
  1130. * advertising materials, and other materials related to such
  1131. * distribution and use acknowledge that the software was developed
  1132. * by the University of California, Berkeley. The name of the
  1133. * University may not be used to endorse or promote products derived
  1134. * from this software without specific prior written permission.
  1135. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  1136. * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  1137. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  1138. */
  1139. #endif /* LZW_SUPPORT */
  1140. /* vim: set ts=8 sts=8 sw=8 noet: */
  1141. /*
  1142. * Local Variables:
  1143. * mode: c
  1144. * c-basic-offset: 8
  1145. * fill-column: 78
  1146. * End:
  1147. */