Temp

From C

Jump to: navigation, search


Encode/decode integers

When there's a need to export/import integers across different implementations (i.e. via files or network), programmers often resort to making a variety of assumptions about the properties the implementations must have in common. For example, properties that are associated with the object representation- such as its size, encoding and layout. For the purpose of achieving better portability across implementations it's needed to eliminate as many assumptions as possible. A common approach would be to use the optional exact-width types (since C99); by using such types the size (bits) and the encoding are specified, but its layout is unspecified. The need to achieve portability even when the layout differs (i.e. endianness) often leads programmers to write 2 different versions of their export/import functions. Although the exact-width integer types are very much common, and although there are only two common layouts, our exports can still end up being incompatible- just for the fact that most programmers don't have access to machines where they could test an export produced by an export function of version A, being imported by an import function of version B.

There's a different way to achieve it, a way that doesn't require the use of optional types, doesn't assume 2 common layouts, and doesn't need maintaining two different versions. Instead of depending on the object representation of our types, we operate on their values- we encode and decode the values to/from a specified format.

Code Sample #1

The following code sample encodes and decodes unsigned integers with a specified width. The object representation of the types is irrelevant to these functions. The only assumption we make is that the implementation which produced an export, share the same defintion of a 'byte' (CHAR_BIT) with the implementation that imports it. This assumption is the bare minimum that virtually all methods have to make.

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>

void uint_encode(unsigned char *buffer, uintmax_t in, size_t width) {
    for (size_t i = 0; i < width; i++)
        buffer[i] = (in >> (i * CHAR_BIT)) & UCHAR_MAX;
}

void uint_decode(uintmax_t *out, unsigned char *buffer, size_t width) {
    *out = 0;
    for (size_t i = 0; i < width; i++)
        *out |= (uintmax_t) buffer[i] << (i * CHAR_BIT);
    
}

int main(void) {
    uintmax_t in;
    unsigned char buffer[1024];
    uintmax_t out;
    
    in = 0xff00ff00ff00ff00;
    uint_encode(buffer, in, sizeof in);
    uint_decode(&out, buffer, sizeof in);
    printf("in:  %jx\n", in);
    printf("out: %jx\n", out);
    
    return 0;
}

Code Sample #2

The following code sample encodes and decodes unsigned integers of fixed 8 bytes width. The object representation of the types is irrelevant to these functions. Since the width of the type we encode/decode is fixed, we make the assumption that CHAR_BIT is 8.

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>

void uint_encode(unsigned char *buffer, uintmax_t in) {
    buffer[0] = (in >> (0 * CHAR_BIT)) & UCHAR_MAX;
    buffer[1] = (in >> (1 * CHAR_BIT)) & UCHAR_MAX;
    buffer[2] = (in >> (2 * CHAR_BIT)) & UCHAR_MAX;
    buffer[3] = (in >> (3 * CHAR_BIT)) & UCHAR_MAX;
    buffer[4] = (in >> (4 * CHAR_BIT)) & UCHAR_MAX;
    buffer[5] = (in >> (5 * CHAR_BIT)) & UCHAR_MAX;
    buffer[6] = (in >> (6 * CHAR_BIT)) & UCHAR_MAX;
    buffer[7] = (in >> (7 * CHAR_BIT)) & UCHAR_MAX;
}

void uint_decode(uintmax_t *out, unsigned char *buffer) {
    *out = ((uintmax_t) buffer[0] << (0 * CHAR_BIT)) | 
           ((uintmax_t) buffer[1] << (1 * CHAR_BIT)) | 
           ((uintmax_t) buffer[2] << (2 * CHAR_BIT)) | 
           ((uintmax_t) buffer[3] << (3 * CHAR_BIT)) | 
           ((uintmax_t) buffer[4] << (4 * CHAR_BIT)) | 
           ((uintmax_t) buffer[5] << (5 * CHAR_BIT)) | 
           ((uintmax_t) buffer[6] << (6 * CHAR_BIT)) | 
           ((uintmax_t) buffer[7] << (7 * CHAR_BIT));
}

int main(void) {
    uintmax_t in;
    unsigned char buffer[1024];
    uintmax_t out;
    
    in = 0xff00ff00ff00ff00;
    uint_encode(buffer, in);
    uint_decode(&out, buffer);
    printf("in:  %jx\n", in);
    printf("out: %jx\n", out);
    
    return 0;
}

Code Sample #3

The following code sample encodes and decodes signed integers with a specified width. The format of the export is two's complement. The object representation of the types is irrelevant to these functions. The only assumption we make is that the implementation which produced an export, share the same defintion of a 'byte' (CHAR_BIT) with the implementation that imports it. This assumption is the bare minimum that virtually all methods have to make.

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>

void uint_encode(unsigned char *buffer, uintmax_t in, size_t width) {
    for (size_t i = 0; i < width; i++)
        buffer[i] = (unsigned char) (in >> (i * CHAR_BIT)) & UCHAR_MAX;
}

void int_encode(unsigned char *buffer, intmax_t in, size_t width) {
    if (in < 0) {
        uintmax_t pos = ~ (uintmax_t) - (in + 1) & INTMAX_MAX;
        
        uint_encode(buffer, pos, width);
        
        buffer[width - 1] |= 1u << (CHAR_BIT - 1);
    } else {
        uint_encode(buffer, (uintmax_t) in, width);
    }
}

void uint_decode(uintmax_t *out, unsigned char *buffer, size_t width) {
    *out = 0;
    
    for (size_t i = 0; i < width; i++)
        *out |= (uintmax_t) buffer[i] << (i * CHAR_BIT);
}

void int_decode(intmax_t *out, unsigned char *buffer, size_t width) {
    if (buffer[width - 1] & (1u << (CHAR_BIT - 1))) {
        buffer[width - 1] &= (unsigned char) ~ (1u << (CHAR_BIT - 1));
        
        uintmax_t pos;
        uint_decode(&pos, buffer, width);
        
        *out = (intmax_t) - (~ pos & INTMAX_MAX) - 1;
    } else {
        uintmax_t pos;
        uint_decode(&pos, buffer, width);
        
        *out = (intmax_t) pos;
    }
}

int main(void) {
    intmax_t in;
    unsigned char buffer[1024];
    intmax_t out;
    
    in = INTMAX_MIN;
    int_encode(buffer, in, sizeof in);
    int_decode(&out, buffer, sizeof in);
    printf("in:  %jd\n", in);
    printf("out: %jd\n", out);
    
    return 0;
}

Code Sample #4

The following code sample encodes and decodes signed integers of fixed 8 bytes width. The format of the export is two's complement. The object representation of the types is irrelevant to these functions. Since the width of the type we encode/decode is fixed, we make the assumption that CHAR_BIT is 8.

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>

void uint_encode(unsigned char *buffer, uintmax_t in) {
    buffer[0] = (unsigned char) (in >> (0 * CHAR_BIT)) & UCHAR_MAX;
    buffer[1] = (unsigned char) (in >> (1 * CHAR_BIT)) & UCHAR_MAX;
    buffer[2] = (unsigned char) (in >> (2 * CHAR_BIT)) & UCHAR_MAX;
    buffer[3] = (unsigned char) (in >> (3 * CHAR_BIT)) & UCHAR_MAX;
    buffer[4] = (unsigned char) (in >> (4 * CHAR_BIT)) & UCHAR_MAX;
    buffer[5] = (unsigned char) (in >> (5 * CHAR_BIT)) & UCHAR_MAX;
    buffer[6] = (unsigned char) (in >> (6 * CHAR_BIT)) & UCHAR_MAX;
    buffer[7] = (unsigned char) (in >> (7 * CHAR_BIT)) & UCHAR_MAX;
}

void int_encode(unsigned char *buffer, intmax_t in) {
    if (in < 0) {
        uintmax_t pos = ~ (uintmax_t) - (in + 1) & INTMAX_MAX;
        
        uint_encode(buffer, pos);
        
        buffer[7] |= 1u << (CHAR_BIT - 1);
    } else {
        uint_encode(buffer, (uintmax_t) in);
    }
}

void uint_decode(uintmax_t *pos, unsigned char *buffer) {
    *pos = 0;
    
    *pos |= (uintmax_t) buffer[0] << (0 * CHAR_BIT);
    *pos |= (uintmax_t) buffer[1] << (1 * CHAR_BIT);
    *pos |= (uintmax_t) buffer[2] << (2 * CHAR_BIT);
    *pos |= (uintmax_t) buffer[3] << (3 * CHAR_BIT);
    *pos |= (uintmax_t) buffer[4] << (4 * CHAR_BIT);
    *pos |= (uintmax_t) buffer[5] << (5 * CHAR_BIT);
    *pos |= (uintmax_t) buffer[6] << (6 * CHAR_BIT);
    *pos |= (uintmax_t) buffer[7] << (7 * CHAR_BIT);
}

void int_decode(intmax_t *out, unsigned char *buffer) {
    if (buffer[7] & (1u << (CHAR_BIT - 1))) {
        buffer[7] &= (unsigned char) ~ (1u << (CHAR_BIT - 1));
        
        uintmax_t pos;
        uint_decode(&pos, buffer);
        
        *out = (intmax_t) - (~ pos & INTMAX_MAX) - 1;
    } else {
        uintmax_t pos;
        uint_decode(&pos, buffer);
        
        *out = (intmax_t) pos;
    }
}

int main(void) {
    intmax_t in;
    unsigned char buffer[1024];
    intmax_t out;
    
    in = INTMAX_MIN;
    int_encode(buffer, in);
    int_decode(&out, buffer);
    printf("in:  %jd\n", in);
    printf("out: %jd\n", out);
    
    return 0;
}
Personal tools