*Based on Vedio *

Behind OpenMP is pthreads

Don’t know why OpenMp works? Learn about pthreads!

pthread_create(), pthread_join()

If you man pthread

pthread_create: int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)

What we need is

  • pthread_t tid;
  • pthread_attr_t attr; pthread_attr_init(&attr);
  • a (void*) function that takes in void* arg, which is a pointer pointing to what we want to pass to func.

This is how we use pthread_create() to create a thread running func. pthread_create(&tid, &attr, func_name, &arg_name)

pthread_join: int pthread_join(pthread_t thread, void **value_ptr)

Use Global variable to return value

While being executed, global variables are actually put into heap, where each thread could get access to. Use gcc -Wall -g -std=c99 -Werror -pthread sum_on_threads.c -o sum_on_threads to compile code below.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
// sum_on_thread.c
#include <stdio.h>
#include <pthread.h>
long long sum = 0;
void* sum_runner(void* arg)
{
    long long *limit_ptr = (long long*) arg;
    long long limit = *limit_ptr;

    for (long long i = 0; i <= limit; ++i) 
        sum += i;

    pthread_exit(0);// when finished, return nothing
}

int main(int argc, char *argv[]){
    if(argc < 2){
        printf("Usage: <num>");
        exit(-1);
    }
    long long limit = atoll(argv[1]);

    pthread_t tid;
    pthread_attr_t attr;
    pthread_attr_init(&attr);

    pthread_create(&tid, &attr, sum_runner, &limit);

    pthread_join(tid, NULL);
    printf("sum is %lld\n", sum);
}

Use a Struct containing input and output

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
// sum_on_many_threads.c
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

struct sum_runner_struct{
    long long limit;
    long long answer;
}
void* sum_runner(void *arg){
    sum_runner_struct *arg_struct = (sum_runner_struct*) arg;
    long long sum = 0;
    for(long long i = 0; i <= arg_struct->limit; ++i)
        sum +=1;
    arg_struct->answer = sum;
    pthread_exit(0);
}


int main(int argc, char *argv[]){
    if (argc < 2){
    printf("Usage:<num1><num2>...");
    exit(-1);
    }
    int num_args = argc -1;
    struct sum_runner_struct args[num_args];

    pthread_t tids[num_args];
    for (int i = 0; i < num_args; ++i){
        arg[i].limit = atoll(argv[i]);
        pthread_attr_t attr;
        pthread_attr_init(&attr);
        pthread_create(&tids[i], &attr, sum_runner, &args[i]);
    }
    for(int i = 0; i < num_args; ++i){
        pthread_join(&tids[i], &attr, sum_runner, &args[i]);
        printf("Sum for thread %d is %lld\n", i, arg[i].answer);
    }
    return 0;
}

Use malloc inside func

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
void* sum_runner(void* arg)
{
    long long *limit_ptr = (long long*) arg;
    long long limit = *limit_ptr;
    free(arg);

    long long sum = 0;
    for (long long i = 0; i <= limit; i++) {
        sum += i;
    }

    // Pass back data in dynamically allocated memory
    long long *answer = malloc(sizeof(*answer));
    // answer will alway be in somewhere in memory until you free it.
    *answer = sum;
    pthread_exit(answer);
}

int main(int argc, char **argv)
{
    if (argc < 2) {
        printf("Usage:<num>\n");
        exit(-1);
    }

    long long *limit = malloc(sizeof(*limit));
    *limit = atoll(argv[1]);

    pthread_t tid;
    pthread_attr_t attr;
    pthread_attr_init(&attr);

    pthread_create(&tid, &attr, sum_runner, limit);

    // Wait until thread is done its work
    long long *result;
    /* Here the pointer result is what we want to pass in the func, so
    we need a pointer point to result */
    pthread_join(tid, (void**)&result);
    printf("Sum is %lld\n", *result);
    // don't forget to free, otherwise would cause memory leakage
    free(result);
}

Syncronizer is needed

With mutex, we can get the right answer, but the time consumed increased from 1 s to 45s! The correct summing result should be 0 in the following code.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
// count.c
#include <stdio.h>
#include <pthread.h>

#define NUM_LOOPS 50000000
long long sum = 0;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

void* counting_thread(void* arg){
    int offset = *(int *)arg;
    for(int i = 0; i < NUM_LOOPS; ++i){
    //Start critical section
    pthread_mutex_lock(&mutex);
    // at each time only one thread could run the code below
    sum += offset;
    pthread_mutex_unlock(&mutex);
    //End critical section
    }
int main(){
    pthread_t tid1;
    int offset1 = 1;
    pthread_create(&tid1, NULL, counting_thread, &offset1);

    pthread_t tid2;
    int offset2 = -1;
    /*what if we do offset1 = -1 and pass &offset1 to pthread_create below?
    Tint: it would be a terrible idea.
    */
    pthread_create(&tid2, NULL, counting_thread, &offset2);

    pthread_join(tid1, NULL);
    pthread_join(tid2, NULL);
    printf("sum = %lld\n",sum);
    return 0;

    }
}

Appendix:Makefile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Makefile
TARGETS= sum_on_threads
CC_CPP = g++
CC_C = gcc

CFLAGS = -Wall -g -std=c99 -Werror -pthread
# -Wall means Warning all
# -g ?
# -Werror means raise all warning as error, here is to force us to make our code correct
# -pthread is to link to pthread libs
all: clean $(TARGETS)

$(TARGETS):
    $(CC_C) $(CFLAGS) $@.c -o $@ 

clean:
    rm -f $(TARGETS)