admin管理员组

文章数量:1122832

The main program, acting as the parent process, creates child processes using fork() based on the number of processes specified as a parameter. The parent process evenly divides the entire file size among the child processes, assigning each child process a specific range of the file to search for alphabetic characters and count their occurrences. The division is based on the following rules:

A. For example, if the file size is 1000 bytes and the number of processes is 2, the main process creates 2 child processes. Each child process will be assigned a file offset range: 0–499 and 500–999, respectively. (File offsets start at 0.)

B. If the file size is 1000 bytes and the number of processes is 3, the file offset ranges assigned to each child process would be 0–332, 333–665, and 666–999. Any remainder from the division is included in the last range.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <string.h>

int count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
    int count = 0;
    char c;
    lseek(fd, start_offset, SEEK_SET);
    
    for (off_t i = start_offset; i <= end_offset; i++) {
        if (read(fd, &c, 1) == 1 && isalpha(c)) {
            count++;
        }
    }
    
    return count;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
        exit(1);
    }

    const char *filename = argv[1];
    int num_children = atoi(argv[2]);
    if (num_children <= 0) {
        fprintf(stderr, "Number of children must be a positive integer.\n");
        exit(1);
    }

    int fd = open(filename, O_RDONLY);
    if (fd < 0) {
        perror("Failed to open file");
        exit(1);
    }

    off_t file_size = lseek(fd, 0, SEEK_END);
    close(fd);
    if (file_length <= 0) {
        fprintf(stderr, "File is empty or unreadable.\n");
        exit(1);
    }
    
    int offset_range = file_size / num_children;
    int extra_range = file_size % num_children;

    int pipefd[2];
    if (pipe(pipefd) == -1) {
        perror("Pipe failed");
        exit(1);
    }

    for (int i = 0; i < num_children; ++i) {
        pid_t pid = fork();
        if (pid < 0) {
            perror("Fork failed");
            exit(1);
        } else if (pid == 0) {
            off_t start_offset = i * offset_range;
            off_t end_offset = start_offset + offset_range;
            if (i == num_children - 1) {
                end_offset += extra_range;
            }

            int count = count_alphabet(filename, start_offset, end_offset);
            fprintf(stderr, "Process[%d] has found %d alphabet letters in (%ld ~ %ld).", 
                    getpid(), count, start_offset, end_offset);
                                        
            char child_buf[1000000];
            int written_count = snprintf(child_buf, sizeof(temp), "%d\n", count); 
            write(pipefd[1], child_buf, written_count);
        }
    }

    int total_count = 0;
    char parent_buf[1000000];
    int read_count;
    
    while ((read_count = read(pipefd[0], parent_buf, sizeof(parent_buf) - 1)) > 0) {
        parent_buf[read_count] = '\0'; 
        char *line = strtok(parent_buf, "\n");
        while (line != NULL) {
            total_count += atoi(line);
            line = strtok(NULL, "\n");
        }
    }

    while (waitpid(-1, NULL, 0) > 0);

    fprintf(stderr, "Process[%d] has found %d alphabet letters in %c",
            getpid(), total_count, filename);
    return 0;
}

I wrote the code as described above, but it seems like the counting logic is incorrect. However, I'm not sure how to fix it. Please help me!

The main program, acting as the parent process, creates child processes using fork() based on the number of processes specified as a parameter. The parent process evenly divides the entire file size among the child processes, assigning each child process a specific range of the file to search for alphabetic characters and count their occurrences. The division is based on the following rules:

A. For example, if the file size is 1000 bytes and the number of processes is 2, the main process creates 2 child processes. Each child process will be assigned a file offset range: 0–499 and 500–999, respectively. (File offsets start at 0.)

B. If the file size is 1000 bytes and the number of processes is 3, the file offset ranges assigned to each child process would be 0–332, 333–665, and 666–999. Any remainder from the division is included in the last range.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <string.h>

int count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
    int count = 0;
    char c;
    lseek(fd, start_offset, SEEK_SET);
    
    for (off_t i = start_offset; i <= end_offset; i++) {
        if (read(fd, &c, 1) == 1 && isalpha(c)) {
            count++;
        }
    }
    
    return count;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
        exit(1);
    }

    const char *filename = argv[1];
    int num_children = atoi(argv[2]);
    if (num_children <= 0) {
        fprintf(stderr, "Number of children must be a positive integer.\n");
        exit(1);
    }

    int fd = open(filename, O_RDONLY);
    if (fd < 0) {
        perror("Failed to open file");
        exit(1);
    }

    off_t file_size = lseek(fd, 0, SEEK_END);
    close(fd);
    if (file_length <= 0) {
        fprintf(stderr, "File is empty or unreadable.\n");
        exit(1);
    }
    
    int offset_range = file_size / num_children;
    int extra_range = file_size % num_children;

    int pipefd[2];
    if (pipe(pipefd) == -1) {
        perror("Pipe failed");
        exit(1);
    }

    for (int i = 0; i < num_children; ++i) {
        pid_t pid = fork();
        if (pid < 0) {
            perror("Fork failed");
            exit(1);
        } else if (pid == 0) {
            off_t start_offset = i * offset_range;
            off_t end_offset = start_offset + offset_range;
            if (i == num_children - 1) {
                end_offset += extra_range;
            }

            int count = count_alphabet(filename, start_offset, end_offset);
            fprintf(stderr, "Process[%d] has found %d alphabet letters in (%ld ~ %ld).", 
                    getpid(), count, start_offset, end_offset);
                                        
            char child_buf[1000000];
            int written_count = snprintf(child_buf, sizeof(temp), "%d\n", count); 
            write(pipefd[1], child_buf, written_count);
        }
    }

    int total_count = 0;
    char parent_buf[1000000];
    int read_count;
    
    while ((read_count = read(pipefd[0], parent_buf, sizeof(parent_buf) - 1)) > 0) {
        parent_buf[read_count] = '\0'; 
        char *line = strtok(parent_buf, "\n");
        while (line != NULL) {
            total_count += atoi(line);
            line = strtok(NULL, "\n");
        }
    }

    while (waitpid(-1, NULL, 0) > 0);

    fprintf(stderr, "Process[%d] has found %d alphabet letters in %c",
            getpid(), total_count, filename);
    return 0;
}

I wrote the code as described above, but it seems like the counting logic is incorrect. However, I'm not sure how to fix it. Please help me!

Share Improve this question edited Nov 23, 2024 at 12:23 chqrlie 144k12 gold badges130 silver badges207 bronze badges asked Nov 21, 2024 at 13:18 kkkkkk 232 bronze badges 3
  • 2 Your child processes do not exit once they're done. They will continue with the rest of the main function, and do what the parent process is doing, which includes creating new processes. – Some programmer dude Commented Nov 21, 2024 at 13:23
  • 1 Oh, and the code won't even build... What is temp? If this isn't an actual minimal reproducible example of the code you're building and running, it's going to be hard to help you. – Some programmer dude Commented Nov 21, 2024 at 13:24
  • 1 On a different note, you don't need those extremely large arrays for the strings. Even for a 64-bit type, the number of digits will be far less than one million characters. – Some programmer dude Commented Nov 21, 2024 at 13:26
Add a comment  | 

1 Answer 1

Reset to default 0

There are multiple problems in your code:

In function count_alphabet:

  • the return type should be off_t
  • you do not open the source file, fd is undefined.
  • the variable count should have type off_t to allow for very large files
  • c should be defined as unsigned char for isalpha(c) to have defined behavior on non ASCII bytes on architectures where char is a signed type.
  • failure to read the expected number of bytes should be reported.
  • failute to seek inside the file should be tested and reported.
  • the loop test should be i < end_offset, not <= as this would cause the last byte to be counted twice.

In function main:

  • the file should be open in binary mode on legacy systems that perform automatic translation of file contents.
  • empty files should be accepted and handled correctly
  • offset_range, extra_range and count should have type off_t
  • count, start_offset, end_offset should be cast as (long long) for portability on systems where off_t is not defined as type long.
  • writing to the pipe should be performed in binary to avoid conversion issues on partial reads in the read loop.
  • the child process should be exited after writing to the pipe.
  • reading from the pipe in binary, one int at a time would simplify the reading loop and make it more reliable. This loop should wait for a child exit, test and handle non exit status and return read the count only on successful exit.
  • the final fprintf should use %s for the filename argument, and %lld for count, cast as `(long long).

Here is a modified version:

#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <fcntl.h>

static int open_as_read_binary(const char *filename) {
#ifdef O_BINARY
    return open(filename, O_RDONLY | O_BINARY);
#else
    return open(filename, O_RDONLY);
#endif
}

off_t count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
    int fd = open_as_read_binary(filename);
    if (fd < 0) {
        perror("Failed to open file");
        exit(1);
    }
    if (lseek(fd, start_offset, SEEK_SET) < 0) {
        perror("Failed to seek into file");
        exit(1);
    }

    off_t count = 0;
    for (off_t i = start_offset; i < end_offset;) {
        unsigned char c;
        ssize_t nread = read(fd, &c, 1);
        if (nread == 1) {
            if (isalpha(c)) {
                count++;
            }
            i++;
        } else {
            if (nread == 0) {
                fprintf(stderr, "failed to read %lld bytes from %s\n",
                        (long long)(end_offset - start_offset), filename);
                exit(1);
            }
            if (nread < 0) {
                if (errno == EINTR)
                    continue;
            }
            perror("Error reading from file");
            exit(1);
        }
    }
    close(fd);
    return count;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
        exit(1);
    }

    const char *filename = argv[1];
    int num_children = atoi(argv[2]);
    if (num_children <= 0) {
        fprintf(stderr, "Number of children must be a strictly positive integer.\n");
        exit(1);
    }

    int fd = open_as_read_binary(filename);
    if (fd < 0) {
        perror("Failed to open file");
        exit(1);
    }

    off_t file_size = lseek(fd, 0, SEEK_END);
    if (file_size < 0) {
        fprintf(stderr, "File is unseekable.\n");
        exit(1);
    }
    close(fd);

    off_t offset_range = file_size / num_children;
    off_t extra_range = file_size % num_children;

    int pipefd[2];
    if (pipe(pipefd) == -1) {
        perror("Pipe failed");
        exit(1);
    }

    for (int i = 0; i < num_children; ++i) {
        pid_t pid = fork();
        if (pid < 0) {
            perror("Fork failed");
            exit(1);
        }
        if (pid == 0) {
            off_t start_offset = i * offset_range;
            off_t end_offset = start_offset + offset_range;
            if (i == num_children - 1) {
                end_offset += extra_range;
            }
            off_t count = count_alphabet(filename, start_offset, end_offset);
            fprintf(stderr, "Process[%d] has found %lld alphabet letters in (%lld ~ %lld).\n",
                    getpid(), (long long)count, (long long)start_offset, (long long)end_offset);
            // carefully write the count to the pipe
            for (;;) {
                ssize_t write_count = write(pipefd[1], &count, sizeof count);
                if (write_count == sizeof count)
                    return 0;
                if (write_count == -1) {
                    if (errno == EINTR)
                        continue;
                    perror("error writing to the pipe");
                    exit(1);
                }
                fprintf(stderr, "cannot write to the pipe\n");
                exit(1);
            }
        }
    }

    off_t total_count = 0;
    int status;
    int pid;
    while ((pid = waitpid(-1, &status, 0)) >= 0) {
        if (WIFEXITED(status)) {
            int exit_status = WEXITSTATUS(status);
            if (exit_status == 0) {
                for (;;) {
                    off_t count;
                    ssize_t read_count = read(pipefd[0], &count, sizeof count);
                    if (read_count == sizeof count) {
                        total_count += count;
                        break;
                    } else {
                        if (read_count < 0) {
                            if (errno == EINTR)
                                continue;
                            perror("error reading from the pipe");
                            exit(1);
                        }
                        if (read_count == 0)
                            fprintf(stderr, "cannot read from the pipe\n");
                        else
                            fprintf(stderr, "partial read from the pipe\n");
                        exit(1);
                    }
                }
            } else {
                fprintf(stderr, "child process [%d] returned non zero status %d\n",
                        pid, exit_status);
            }
        }
    }
    fprintf(stderr, "Process[%d] has found %lld alphabet letters in %s\n",
            getpid(), (long long)total_count, filename);
    return 0;
}

Note that even writing the counts in binary chunks to the pipe does not guarantee atomic operation: there is a slim but non zero possibility for the bytes to be interlaced between threads. The file handle for writing to the pipe should be locked for each write operation.

本文标签: ccount alphabet of one file with multi processes in LinuxStack Overflow