src/TortoiseMerge/libsvn_diff/util.c

   1 /*\r
   2  * util.c :  routines for doing diffs\r
   3  *\r
   4  * ====================================================================\r
   5  * Copyright (c) 2000-2004 CollabNet.  All rights reserved.\r
   6  *\r
   7  * This software is licensed as described in the file COPYING, which\r
   8  * you should have received as part of this distribution.  The terms\r
   9  * are also available at http://subversion.tigris.org/license-1.html.\r
  10  * If newer versions of this license are posted there, you may use a\r
  11  * newer version instead, at your option.\r
  12  *\r
  13  * This software consists of voluntary contributions made by many\r
  14  * individuals.  For exact contribution history, see the revision\r
  15  * history and logs, available at http://subversion.tigris.org/.\r
  16  * ====================================================================\r
  17  */\r
  18 \r
  19 \r
  20 #include <apr.h>\r
  21 #include <apr_general.h>\r
  22 \r
  23 #include "svn_error.h"\r
  24 #include "svn_version.h"\r
  25 #include "svn_io.h"\r
  26 #include "svn_types.h"\r
  27 #include "svn_ctype.h"\r
  28 \r
  29 #include "diff.h"\r
  30 #include "svn_diff.h"\r
  31 /**\r
  32  * An Adler-32 implementation per RFC1950.\r
  33  *\r
  34  * "The Adler-32 algorithm is much faster than the CRC32 algorithm yet\r
  35  * still provides an extremely low probability of undetected errors"\r
  36  */\r
  37 \r
  38 /*\r
  39  * 65521 is the largest prime less than 65536.\r
  40  * "That 65521 is prime is important to avoid a possible large class of\r
  41  *  two-byte errors that leave the check unchanged."\r
  42  */\r
  43 #define ADLER_MOD_BASE 65521\r
  44 \r
  45 /*\r
  46  * "The modulo on unsigned long accumulators can be delayed for 5552 bytes,\r
  47  *  so the modulo operation time is negligible."\r
  48  */\r
  49 #define ADLER_MOD_BLOCK_SIZE 5552\r
  50 \r
  51 \r
  52 /*\r
  53  * Start with CHECKSUM and update the checksum by processing a chunk\r
  54  * of DATA sized LEN.\r
  55  */\r
  56 apr_uint32_t\r
  57 svn_diff__adler32(apr_uint32_t checksum, const char *data, apr_size_t len)\r
  58 {\r
  59   const unsigned char *input = (const unsigned char *)data;\r
  60   apr_uint32_t s1 = checksum & 0xFFFF;\r
  61   apr_uint32_t s2 = checksum >> 16;\r
  62   apr_uint32_t b;\r
  63   apr_size_t blocks = len / ADLER_MOD_BLOCK_SIZE;\r
  64 \r
  65   len %= ADLER_MOD_BLOCK_SIZE;\r
  66 \r
  67   while (blocks--)\r
  68     {\r
  69       int count = ADLER_MOD_BLOCK_SIZE;\r
  70       while (count--)\r
  71         {\r
  72           b = *input++;\r
  73           s1 += b;\r
  74           s2 += s1;\r
  75         }\r
  76 \r
  77       s1 %= ADLER_MOD_BASE;\r
  78       s2 %= ADLER_MOD_BASE;\r
  79     }\r
  80 \r
  81   while (len--)\r
  82     {\r
  83       b = *input++;\r
  84       s1 += b;\r
  85       s2 += s1;\r
  86     }\r
  87 \r
  88   return ((s2 % ADLER_MOD_BASE) << 16) | (s1 % ADLER_MOD_BASE);\r
  89 }\r
  90 \r
  91 \r
  92 svn_boolean_t\r
  93 svn_diff_contains_conflicts(svn_diff_t *diff)\r
  94 {\r
  95   while (diff != NULL)\r
  96     {\r
  97       if (diff->type == svn_diff__type_conflict)\r
  98         {\r
  99           return TRUE;\r
 100         }\r
 101 \r
 102       diff = diff->next;\r
 103     }\r
 104 \r
 105   return FALSE;\r
 106 }\r
 107 \r
 108 svn_boolean_t\r
 109 svn_diff_contains_diffs(svn_diff_t *diff)\r
 110 {\r
 111   while (diff != NULL)\r
 112     {\r
 113       if (diff->type != svn_diff__type_common)\r
 114         {\r
 115           return TRUE;\r
 116         }\r
 117 \r
 118       diff = diff->next;\r
 119     }\r
 120 \r
 121   return FALSE;\r
 122 }\r
 123 \r
 124 svn_error_t *\r
 125 svn_diff_output(svn_diff_t *diff,\r
 126                 void *output_baton,\r
 127                 const svn_diff_output_fns_t *vtable)\r
 128 {\r
 129   svn_error_t *(*output_fn)(void *,\r
 130                             apr_off_t, apr_off_t,\r
 131                             apr_off_t, apr_off_t,\r
 132                             apr_off_t, apr_off_t);\r
 133 \r
 134   while (diff != NULL)\r
 135     {\r
 136       switch (diff->type)\r
 137         {\r
 138         case svn_diff__type_common:\r
 139           output_fn = vtable->output_common;\r
 140           break;\r
 141 \r
 142         case svn_diff__type_diff_common:\r
 143           output_fn = vtable->output_diff_common;\r
 144           break;\r
 145 \r
 146         case svn_diff__type_diff_modified:\r
 147           output_fn = vtable->output_diff_modified;\r
 148           break;\r
 149 \r
 150         case svn_diff__type_diff_latest:\r
 151           output_fn = vtable->output_diff_latest;\r
 152           break;\r
 153 \r
 154         case svn_diff__type_conflict:\r
 155           output_fn = NULL;\r
 156           if (vtable->output_conflict != NULL)\r
 157             {\r
 158               SVN_ERR(vtable->output_conflict(output_baton,\r
 159                                diff->original_start, diff->original_length,\r
 160                                diff->modified_start, diff->modified_length,\r
 161                                diff->latest_start, diff->latest_length,\r
 162                                diff->resolved_diff));\r
 163             }\r
 164           break;\r
 165 \r
 166         default:\r
 167           output_fn = NULL;\r
 168           break;\r
 169         }\r
 170 \r
 171       if (output_fn != NULL)\r
 172         {\r
 173           SVN_ERR(output_fn(output_baton,\r
 174                             diff->original_start, diff->original_length,\r
 175                             diff->modified_start, diff->modified_length,\r
 176                             diff->latest_start, diff->latest_length));\r
 177         }\r
 178 \r
 179       diff = diff->next;\r
 180     }\r
 181 \r
 182   return SVN_NO_ERROR;\r
 183 }\r
 184 \r
 185 \r
 186 void\r
 187 svn_diff__normalize_buffer(char **tgt,\r
 188                            apr_off_t *lengthp,\r
 189                            svn_diff__normalize_state_t *statep,\r
 190                            const char *buf,\r
 191                            const svn_diff_file_options_t *opts)\r
 192 {\r
 193   /* Variables for looping through BUF */\r
 194   const char *curp, *endp;\r
 195 \r
 196   /* Variable to record normalizing state */\r
 197   svn_diff__normalize_state_t state = *statep;\r
 198 \r
 199   /* Variables to track what needs copying into the target buffer */\r
 200   const char *start = buf;\r
 201   apr_size_t include_len = 0;\r
 202   svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */\r
 203 \r
 204   /* Variable to record the state of the target buffer */\r
 205   char *tgt_newend = *tgt;\r
 206 \r
 207   /* If this is a noop, then just get out of here. */\r
 208   if (! opts->ignore_space && ! opts->ignore_eol_style)\r
 209     {\r
 210       *tgt = (char *)buf;\r
 211       return;\r
 212     }\r
 213 \r
 214 \r
 215   /* It only took me forever to get this routine right,\r
 216      so here my thoughts go:\r
 217 \r
 218     Below, we loop through the data, doing 2 things:\r
 219 \r
 220      - Normalizing\r
 221      - Copying other data\r
 222 \r
 223      The routine tries its hardest *not* to copy data, but instead\r
 224      returning a pointer into already normalized existing data.\r
 225 \r
 226      To this end, a block 'other data' shouldn't be copied when found,\r
 227      but only as soon as it can't be returned in-place.\r
 228 \r
 229      On a character level, there are 3 possible operations:\r
 230 \r
 231      - Skip the character (don't include in the normalized data)\r
 232      - Include the character (do include in the normalizad data)\r
 233      - Include as another character\r
 234        This is essentially the same as skipping the current character\r
 235        and inserting a given character in the output data.\r
 236 \r
 237     The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to\r
 238     handle the character based operations.  The macros themselves\r
 239     collect character level data into blocks.\r
 240 \r
 241     At all times designate the START, INCLUDED_LEN and CURP pointers\r
 242     an included and and skipped block like this:\r
 243 \r
 244       [ start, start + included_len ) [ start + included_len, curp )\r
 245              INCLUDED                        EXCLUDED\r
 246 \r
 247     When the routine flips from skipping to including, the last\r
 248     included block has to be flushed to the output buffer.\r
 249   */\r
 250 \r
 251   /* Going from including to skipping; only schedules the current\r
 252      included section for flushing.\r
 253      Also, simply chop off the character if it's the first in the buffer,\r
 254      so we can possibly just return the remainder of the buffer */\r
 255 #define SKIP             \\r
 256   do {                   \\r
 257     if (start == curp)   \\r
 258        ++start;          \\r
 259     last_skipped = TRUE; \\r
 260   } while (0)\r
 261 \r
 262 #define INCLUDE                \\r
 263   do {                         \\r
 264     if (last_skipped)          \\r
 265       COPY_INCLUDED_SECTION;   \\r
 266     ++include_len;             \\r
 267     last_skipped = FALSE;      \\r
 268   } while (0)\r
 269 \r
 270 #define COPY_INCLUDED_SECTION                     \\r
 271   do {                                            \\r
 272     if (include_len > 0)                          \\r
 273       {                                           \\r
 274          memmove(tgt_newend, start, include_len); \\r
 275          tgt_newend += include_len;               \\r
 276          include_len = 0;                         \\r
 277       }                                           \\r
 278     start = curp;                                 \\r
 279   } while (0)\r
 280 \r
 281   /* Include the current character as character X.\r
 282      If the current character already *is* X, add it to the\r
 283      currently included region, increasing chances for consecutive\r
 284      fully normalized blocks. */\r
 285 #define INCLUDE_AS(x)          \\r
 286   do {                         \\r
 287     if (*curp == (x))          \\r
 288       INCLUDE;                 \\r
 289     else                       \\r
 290       {                        \\r
 291         INSERT((x));           \\r
 292         SKIP;                  \\r
 293       }                        \\r
 294   } while (0)\r
 295 \r
 296   /* Insert character X in the output buffer */\r
 297 #define INSERT(x)              \\r
 298   do {                         \\r
 299     COPY_INCLUDED_SECTION;     \\r
 300     *tgt_newend++ = (x);       \\r
 301   } while (0)\r
 302 \r
 303   for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)\r
 304     {\r
 305       switch (*curp)\r
 306         {\r
 307         case '\r':\r
 308           if (opts->ignore_eol_style)\r
 309             INCLUDE_AS('\n');\r
 310           else\r
 311             INCLUDE;\r
 312           state = svn_diff__normalize_state_cr;\r
 313           break;\r
 314 \r
 315         case '\n':\r
 316           if (state == svn_diff__normalize_state_cr\r
 317               && opts->ignore_eol_style)\r
 318             SKIP;\r
 319           else\r
 320             INCLUDE;\r
 321           state = svn_diff__normalize_state_normal;\r
 322           break;\r
 323 \r
 324         default:\r
 325           if (svn_ctype_isspace(*curp)\r
 326               && opts->ignore_space != svn_diff_file_ignore_space_none)\r
 327             {\r
 328               /* Whitespace but not '\r' or '\n' */\r
 329               if (state != svn_diff__normalize_state_whitespace\r
 330                   && opts->ignore_space\r
 331                      == svn_diff_file_ignore_space_change)\r
 332                 /*### If we can postpone insertion of the space\r
 333                   until the next non-whitespace character,\r
 334                   we have a potential of reducing the number of copies:\r
 335                   If this space is followed by more spaces,\r
 336                   this will cause a block-copy.\r
 337                   If the next non-space block is considered normalized\r
 338                   *and* preceded by a space, we can take advantage of that. */\r
 339                 /* Note, the above optimization applies to 90% of the source\r
 340                    lines in our own code, since it (generally) doesn't use\r
 341                    more than one space per blank section, except for the\r
 342                    beginning of a line. */\r
 343                 INCLUDE_AS(' ');\r
 344               else\r
 345                 SKIP;\r
 346               state = svn_diff__normalize_state_whitespace;\r
 347             }\r
 348           else\r
 349             {\r
 350               /* Non-whitespace character, or whitespace character in\r
 351                  svn_diff_file_ignore_space_none mode. */\r
 352               INCLUDE;\r
 353               state = svn_diff__normalize_state_normal;\r
 354             }\r
 355         }\r
 356     }\r
 357 \r
 358   /* If we're not in whitespace, flush the last chunk of data.\r
 359    * Note that this will work correctly when this is the last chunk of the\r
 360    * file:\r
 361    * * If there is an eol, it will either have been output when we entered\r
 362    *   the state_cr, or it will be output now.\r
 363    * * If there is no eol and we're not in whitespace, then we just output\r
 364    *   everything below.\r
 365    * * If there's no eol and we are in whitespace, we want to ignore\r
 366    *   whitespace unconditionally. */\r
 367 \r
 368   if (*tgt == tgt_newend)\r
 369     {\r
 370       /* we haven't copied any data in to *tgt and our chunk consists\r
 371          only of one block of (already normalized) data.\r
 372          Just return the block. */\r
 373       *tgt = (char *)start;\r
 374       *lengthp = include_len;\r
 375     }\r
 376   else\r
 377     {\r
 378       COPY_INCLUDED_SECTION;\r
 379       *lengthp = tgt_newend - *tgt;\r
 380     }\r
 381 \r
 382   *statep = state;\r
 383 \r
 384 #undef SKIP\r
 385 #undef INCLUDE\r
 386 #undef INCLUDE_AS\r
 387 #undef INSERT\r
 388 #undef COPY_INCLUDED_SECTION\r
 389 }\r
 390 \r
 391 \r
 392 /* Return the library version number. */\r
 393 const svn_version_t *\r
 394 svn_diff_version(void)\r
 395 {\r
 396   SVN_VERSION_BODY;\r
 397 }\r