Safer macros: Type checking

Preprocessor macors is a powerful, highly customized but dangerous weapon. It often contains some loopholes in its design. The main reason is that it doesn't have type-checking. They are just substitution patterns in code before the compilation. Thus, nowadays developers only use them when they have no other ways. If it needs to use them, the macros should be defined properly. This post is written for that moment.

Asserting the parameters

The first thing we need to learn is to check whether arguments are constant or variable without operations.

Suppose we need to define a macro named ASSIGN_CONST_INT to assign a constant into a variable, we might write the following code:

#include <stdio.h>

#define ASSIGN_CONST_INT(variable, value) variable = value;

int main() {
  char a;
  ASSIGN_CONST_INT(a, 10);
  printf("%d\n", a);

  char b = 20;
  ASSIGN_CONST_INT(a, b); // b is not a constant!
  printf("%d\n", a);

  char c = 11, d = 12;
  ASSIGN_CONST_INT(c += d, 50); // c should be set to 50
  printf("%d\n", c);

  return 0;
}

output:

10
20
61

There is two points that we can enhance:

  • Make sure the value is a constant
  • Make sure the variable has no extra operations

Whatever c is operated before passing into ASSIGN_CONST_INT, it should be set to 50 finally. Thus, any extra operations before the assignment is useless and just waste the computational energy consumption.

To meet the above two requirements, we can define ASSIGN_CONST_INT as follows:

// Make sure argument is a variable
#define VARIABLE(v) { enum v { v }; }
// Make sure argument is a const int
#define CONST_INT(v) { enum { E = v }; }

#define ASSIGN_CONST_INT(variable, value) do {  \
  VARIABLE(variable);                           \
  CONST_INT(value);                             \
  variable = value;                             \
} while(0)

(The do{...} while(0) is to used to give a scope {...} for variables.)

Compilation result:

test.c:20:23: error: expression is not an integer constant expression
  ASSIGN_CONST_INT(a, b); // b is not a constant!
                      ^
test.c:10:13: note: expanded from macro 'ASSIGN_CONST_INT'
  CONST_INT(value);                             \
            ^~~~~
test.c:6:35: note: expanded from macro 'CONST_INT'
#define CONST_INT(v) { enum { E = v }; }
                                  ^
test.c:24:22: error: expected identifier or '('
  ASSIGN_CONST_INT(c += d, 50); // c should be set to 50
                     ^
2 errors generated.

Then we can successfully prevent a non-constant value to be assigned into a variable and reduce those useless operations.

Finally, the following will work like this:

#include <stdio.h>

// Make sure argument is a variable
#define VARIABLE(v) { enum v { v }; }
// Make sure argument is a const int
#define CONST_INT(v) { enum { E = v }; }

#define ASSIGN_CONST_INT(variable, value) do {  \
  VARIABLE(variable);                           \
  CONST_INT(value);                             \
  variable = value;                             \
} while(0)

int main() {
  char a;
  ASSIGN_CONST_INT(a, 10);
  printf("%d\n", a);

  char c = 11;
  const char d = 12;
  ASSIGN_CONST_INT(c, d);
  printf("%d\n", c);

  return 0;
}

output:

10
12

Types-checking

The other important issue in macros is type-checking. In many cases, we need to check the arguments types before using them.

For instance, mission here is to write a macro SWAP to swap to variable's values. We know that the following code can swap values without declaring a temporary variable.

#define SWAP(pX, pY) do { \
  *pX = *pX ^ *pY;        \
  *pY = *pX ^ *pY;        \
  *pX = *pX ^ *pY;        \
} while(0)

But we are not allowed to operate double's binary bits.

#include <stdio.h>

#define SWAP(pX, pY) do { \
  *pX = *pX ^ *pY;        \
  *pY = *pX ^ *pY;        \
  *pX = *pX ^ *pY;        \
} while(0)

int main() {
  int a = 10, b = 35;
  printf("a: %d, b: %d\n", a, b);
  SWAP(&a, &b);
  printf("a: %d, b: %d\n", a, b);

  char c = 'C', d = 'D';
  printf("c: %c, d: %c\n", c, d);
  SWAP(&c, &d);
  printf("c: %c, d: %c\n", c, d);

  double e = 3.14, f = 1.618;
  printf("e: %lf, f: %lf\n", e, f);
  SWAP(&e, &f);
  printf("e: %lf, f: %lf\n", e, f);

  return 0;
}

compiler errors:

test.c:22:3: error: invalid operands to binary expression ('double' and 'double')
  SWAP(&e, &f);
  ^~~~~~~~~~~~
test.c:4:13: note: expanded from macro 'SWAP'
  *pX = *pX ^ *pY;        \
        ~~~ ^ ~~~
test.c:22:3: error: invalid operands to binary expression ('double' and 'double')
  SWAP(&e, &f);
  ^~~~~~~~~~~~
test.c:5:13: note: expanded from macro 'SWAP'
  *pY = *pX ^ *pY;        \
        ~~~ ^ ~~~
test.c:22:3: error: invalid operands to binary expression ('double' and 'double')
SWAP(&e, &f);
  ^~~~~~~~~~~~
test.c:6:13: note: expanded from macro 'SWAP'
  *pX = *pX ^ *pY;        \
        ~~~ ^ ~~~
3 errors generated.

Another approach is to use memcpy to do the swap. Whatever the variables types are, they are stored in memory. Why not just swap them directly?

#include <stdio.h>
#include <string.h> // For memcpy

#define SWAP(x, y) do {           \
  unsigned char temp[sizeof(x)];  \
  memcpy(temp, &x, sizeof(x));    \
  memcpy(&x, &y, sizeof(x));      \
  memcpy(&y, &temp, sizeof(x));   \
} while(0)

int main() {
  int a = 10, b = 35;
  printf("a: %d, b: %d\n", a, b);
  SWAP(a, b);
  printf("a: %d, b: %d\n", a, b);

  char c = 'C', d = 'D';
  printf("c: %c, d: %c\n", c, d);
  SWAP(c, d);
  printf("c: %c, d: %c\n", c, d);

  double e = 3.14, f = 1.618;
  printf("e: %lf, f: %lf\n", e, f);
  SWAP(e, f);
  printf("e: %lf, f: %lf\n", e, f);

  return 0;
}

output:

a: 10, b: 35
a: 35, b: 10
c: C, d: D
c: D, d: C
e: 3.140000, f: 1.618000
e: 1.618000, f: 3.140000

Notwithstanding we avoid the variable type's issue, It's still wrong in the following cases without any compiler warnings and errors.

#include <stdio.h>
#include <string.h> // For memcpy

#define SWAP(x, y) do {           \
  unsigned char temp[sizeof(x)];  \
  memcpy(temp, &x, sizeof(x));    \
  memcpy(&x, &y, sizeof(x));      \
  memcpy(&y, &temp, sizeof(x));   \
} while(0)

int main() {
  unsigned char a = 1;   // 1 byte: 0x01
  unsigned int b = 256;  // 4 byte: 0x00000100

  printf("a: %d, b: %d\n", a, b);
  SWAP(a, b);
  printf("a: %d, b: %d\n", a, b);

  return 0;
}

output:

a: 1, b: 256
a: 0, b: 257

Oops! We forget to tackle the case that the variables' the memory size are different. Replacing unsigned char temp[sizeof(x)] with unsigned char temp[(signed)((sizeof(x) == sizeof(y))? sizeof(x) : -1)] will give us a compile-time check for the types of two parameters, because declaring an array with negative size is not allowed.

#include <stdio.h>
#include <string.h> // For memcpy

#define SWAP(x, y) do {                                                   \
  unsigned char temp[(signed)((sizeof(x) == sizeof(y))? sizeof(x) : -1)]; \
  memcpy(temp, &x, sizeof(x));                                            \
  memcpy(&x, &y, sizeof(x));                                              \
  memcpy(&y, &temp, sizeof(x));                                           \
} while(0)

int main() {
  unsigned char a = 1;   // 1 byte: 0x01
  unsigned int b = 256;  // 4 byte: 0x00000100

  printf("a: %d, b: %d\n", a, b);
  SWAP(a, b);
  printf("a: %d, b: %d\n", a, b);

  return 0;
}

output:

test.c:16:3: error: 'temp' declared as an array with a negative size
  SWAP(a, b);
  ^~~~~~~~~~
test.c:5:22: note: expanded from macro 'SWAP'
  unsigned char temp[(signed)((sizeof(x) == sizeof(y))? sizeof(x) : -1)]; \
                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1 error generated.

It seems we solve the problem, but actually, we don't. Even the two variables' memory size are same, their types may still be different.

#include <stdio.h>
#include <string.h>

#define SWAP(x, y) do {                                                   \
  unsigned char temp[(signed)((sizeof(x) == sizeof(y))? sizeof(x) : -1)]; \
  memcpy(temp, &x, sizeof(x));                                            \
  memcpy(&x, &y, sizeof(x));                                              \
  memcpy(&y, &temp, sizeof(x));                                           \
} while(0)

int main() {
  char a = -10;
  unsigned char b = 255;

  printf("a: %d, b: %d\n", a, b);
  SWAP(a, b);
  printf("a: %d, b: %d\n", a, b);

  SWAP(a, a); // Just waste computational energy consumption
  printf("a: %d, b: %d\n", a, b);

  return 0;
}

output:

a: -10, b: 255
a: -1, b: 246
a: -1, b: 246

Moreover, we should notice that there is no need to be swapped if the two passed arguments are same.

Summarily, we need to meet the following extra requirements:

  • More precise type-checking than using sizeof(TYPE).
  • Only swap unique variables.

We can add two helper macros ASSERT_SAME_TYPE and UNIQUE to do that:

#include <stdio.h>
#include <string.h>

// Give a warning for different types: -Wcompare-distinct-pointer-types
// To mark this warning as error:
//   $ gcc -Werror=compare-distinct-pointer-types
#define ASSERT_SAME_TYPE(x, y)  ((void) (&(x) == &(y)))

// Make sure every parameters is unique
#define UNIQUE(is...) { enum { is }; }

#define SWAP(x, y) do {           \
  ASSERT_SAME_TYPE(x, y);         \
  UNIQUE(x, y);                   \
  unsigned char temp[sizeof(x)];  \
  memcpy(temp, &x, sizeof(x));    \
  memcpy(&x, &y, sizeof(x));      \
  memcpy(&y, &temp, sizeof(x));   \
} while(0)

int main() {
  char a = -10;
  unsigned char b = 255;

  printf("a: %d, b: %d\n", a, b);
  SWAP(a, b);
  printf("a: %d, b: %d\n", a, b);

  SWAP(a, a); // Just waste computational energy consumption
  printf("a: %d, b: %d\n", a, b);

  return 0;
}

output:

test.c:26:3: error: comparison of distinct pointer types ('char *' and 'unsigned char *') [-Werror,-Wcompare-distinct-pointer-types]
  SWAP(a, b);
  ^~~~~~~~~~
test.c:13:3: note: expanded from macro 'SWAP'
  ASSERT_SAME_TYPE(x, y);         \
  ^~~~~~~~~~~~~~~~~~~~~~
test.c:7:47: note: expanded from macro 'ASSERT_SAME_TYPE'
#define ASSERT_SAME_TYPE(x, y)  ((void) (&(x) == &(y)))
                                         ~~~~ ^  ~~~~

test.c:29:11: error: redefinition of enumerator 'a'
  SWAP(a, a); // Just waste computational energy consumption
          ^
test.c:29:8: note: previous definition is here
  SWAP(a, a); // Just waste computational energy consumption
       ^
2 errors generated.

Summary

Common used helper macros

// Make sure argument is a variable
#define VARIABLE(v) { enum v { v }; }

// Make sure argument is a const int
#define CONST_INT(v) { enum { E = v }; }

// Make sure every parameters is unique
#define UNIQUE(is...) { enum { is }; }

// Give a warning for different types: -Wcompare-distinct-pointer-types
// To mark this warning as error:
//   $ gcc -Werror=compare-distinct-pointer-types
#define ASSERT_SAME_TYPE(x, y)  ((void) (&(x) == &(y)))

References

results matching ""

    No results matching ""