admin管理员组文章数量:1122832
The main program, acting as the parent process, creates child processes using fork()
based on the number of processes specified as a parameter. The parent process evenly divides the entire file size among the child processes, assigning each child process a specific range of the file to search for alphabetic characters and count their occurrences. The division is based on the following rules:
A. For example, if the file size is 1000 bytes and the number of processes is 2, the main process creates 2 child processes. Each child process will be assigned a file offset range: 0–499 and 500–999, respectively. (File offsets start at 0.)
B. If the file size is 1000 bytes and the number of processes is 3, the file offset ranges assigned to each child process would be 0–332, 333–665, and 666–999. Any remainder from the division is included in the last range.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <string.h>
int count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
int count = 0;
char c;
lseek(fd, start_offset, SEEK_SET);
for (off_t i = start_offset; i <= end_offset; i++) {
if (read(fd, &c, 1) == 1 && isalpha(c)) {
count++;
}
}
return count;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
exit(1);
}
const char *filename = argv[1];
int num_children = atoi(argv[2]);
if (num_children <= 0) {
fprintf(stderr, "Number of children must be a positive integer.\n");
exit(1);
}
int fd = open(filename, O_RDONLY);
if (fd < 0) {
perror("Failed to open file");
exit(1);
}
off_t file_size = lseek(fd, 0, SEEK_END);
close(fd);
if (file_length <= 0) {
fprintf(stderr, "File is empty or unreadable.\n");
exit(1);
}
int offset_range = file_size / num_children;
int extra_range = file_size % num_children;
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("Pipe failed");
exit(1);
}
for (int i = 0; i < num_children; ++i) {
pid_t pid = fork();
if (pid < 0) {
perror("Fork failed");
exit(1);
} else if (pid == 0) {
off_t start_offset = i * offset_range;
off_t end_offset = start_offset + offset_range;
if (i == num_children - 1) {
end_offset += extra_range;
}
int count = count_alphabet(filename, start_offset, end_offset);
fprintf(stderr, "Process[%d] has found %d alphabet letters in (%ld ~ %ld).",
getpid(), count, start_offset, end_offset);
char child_buf[1000000];
int written_count = snprintf(child_buf, sizeof(temp), "%d\n", count);
write(pipefd[1], child_buf, written_count);
}
}
int total_count = 0;
char parent_buf[1000000];
int read_count;
while ((read_count = read(pipefd[0], parent_buf, sizeof(parent_buf) - 1)) > 0) {
parent_buf[read_count] = '\0';
char *line = strtok(parent_buf, "\n");
while (line != NULL) {
total_count += atoi(line);
line = strtok(NULL, "\n");
}
}
while (waitpid(-1, NULL, 0) > 0);
fprintf(stderr, "Process[%d] has found %d alphabet letters in %c",
getpid(), total_count, filename);
return 0;
}
I wrote the code as described above, but it seems like the counting logic is incorrect. However, I'm not sure how to fix it. Please help me!
The main program, acting as the parent process, creates child processes using fork()
based on the number of processes specified as a parameter. The parent process evenly divides the entire file size among the child processes, assigning each child process a specific range of the file to search for alphabetic characters and count their occurrences. The division is based on the following rules:
A. For example, if the file size is 1000 bytes and the number of processes is 2, the main process creates 2 child processes. Each child process will be assigned a file offset range: 0–499 and 500–999, respectively. (File offsets start at 0.)
B. If the file size is 1000 bytes and the number of processes is 3, the file offset ranges assigned to each child process would be 0–332, 333–665, and 666–999. Any remainder from the division is included in the last range.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <string.h>
int count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
int count = 0;
char c;
lseek(fd, start_offset, SEEK_SET);
for (off_t i = start_offset; i <= end_offset; i++) {
if (read(fd, &c, 1) == 1 && isalpha(c)) {
count++;
}
}
return count;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
exit(1);
}
const char *filename = argv[1];
int num_children = atoi(argv[2]);
if (num_children <= 0) {
fprintf(stderr, "Number of children must be a positive integer.\n");
exit(1);
}
int fd = open(filename, O_RDONLY);
if (fd < 0) {
perror("Failed to open file");
exit(1);
}
off_t file_size = lseek(fd, 0, SEEK_END);
close(fd);
if (file_length <= 0) {
fprintf(stderr, "File is empty or unreadable.\n");
exit(1);
}
int offset_range = file_size / num_children;
int extra_range = file_size % num_children;
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("Pipe failed");
exit(1);
}
for (int i = 0; i < num_children; ++i) {
pid_t pid = fork();
if (pid < 0) {
perror("Fork failed");
exit(1);
} else if (pid == 0) {
off_t start_offset = i * offset_range;
off_t end_offset = start_offset + offset_range;
if (i == num_children - 1) {
end_offset += extra_range;
}
int count = count_alphabet(filename, start_offset, end_offset);
fprintf(stderr, "Process[%d] has found %d alphabet letters in (%ld ~ %ld).",
getpid(), count, start_offset, end_offset);
char child_buf[1000000];
int written_count = snprintf(child_buf, sizeof(temp), "%d\n", count);
write(pipefd[1], child_buf, written_count);
}
}
int total_count = 0;
char parent_buf[1000000];
int read_count;
while ((read_count = read(pipefd[0], parent_buf, sizeof(parent_buf) - 1)) > 0) {
parent_buf[read_count] = '\0';
char *line = strtok(parent_buf, "\n");
while (line != NULL) {
total_count += atoi(line);
line = strtok(NULL, "\n");
}
}
while (waitpid(-1, NULL, 0) > 0);
fprintf(stderr, "Process[%d] has found %d alphabet letters in %c",
getpid(), total_count, filename);
return 0;
}
I wrote the code as described above, but it seems like the counting logic is incorrect. However, I'm not sure how to fix it. Please help me!
Share Improve this question edited Nov 23, 2024 at 12:23 chqrlie 144k12 gold badges130 silver badges207 bronze badges asked Nov 21, 2024 at 13:18 kkkkkk 232 bronze badges 3 |1 Answer
Reset to default 0There are multiple problems in your code:
In function count_alphabet
:
- the return type should be
off_t
- you do not open the source file,
fd
is undefined. - the variable
count
should have typeoff_t
to allow for very large files c
should be defined asunsigned char
forisalpha(c)
to have defined behavior on non ASCII bytes on architectures wherechar
is a signed type.- failure to read the expected number of bytes should be reported.
- failute to seek inside the file should be tested and reported.
- the loop test should be
i < end_offset
, not<=
as this would cause the last byte to be counted twice.
In function main
:
- the file should be open in binary mode on legacy systems that perform automatic translation of file contents.
- empty files should be accepted and handled correctly
offset_range
,extra_range
andcount
should have typeoff_t
count
,start_offset
,end_offset
should be cast as(long long)
for portability on systems whereoff_t
is not defined as typelong
.- writing to the pipe should be performed in binary to avoid conversion issues on partial reads in the read loop.
- the child process should be exited after writing to the pipe.
- reading from the pipe in binary, one
int
at a time would simplify the reading loop and make it more reliable. This loop should wait for a child exit, test and handle non exit status and return read the count only on successful exit. - the final
fprintf
should use%s
for thefilename
argument, and%lld
for count, cast as `(long long).
Here is a modified version:
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <fcntl.h>
static int open_as_read_binary(const char *filename) {
#ifdef O_BINARY
return open(filename, O_RDONLY | O_BINARY);
#else
return open(filename, O_RDONLY);
#endif
}
off_t count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
int fd = open_as_read_binary(filename);
if (fd < 0) {
perror("Failed to open file");
exit(1);
}
if (lseek(fd, start_offset, SEEK_SET) < 0) {
perror("Failed to seek into file");
exit(1);
}
off_t count = 0;
for (off_t i = start_offset; i < end_offset;) {
unsigned char c;
ssize_t nread = read(fd, &c, 1);
if (nread == 1) {
if (isalpha(c)) {
count++;
}
i++;
} else {
if (nread == 0) {
fprintf(stderr, "failed to read %lld bytes from %s\n",
(long long)(end_offset - start_offset), filename);
exit(1);
}
if (nread < 0) {
if (errno == EINTR)
continue;
}
perror("Error reading from file");
exit(1);
}
}
close(fd);
return count;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
exit(1);
}
const char *filename = argv[1];
int num_children = atoi(argv[2]);
if (num_children <= 0) {
fprintf(stderr, "Number of children must be a strictly positive integer.\n");
exit(1);
}
int fd = open_as_read_binary(filename);
if (fd < 0) {
perror("Failed to open file");
exit(1);
}
off_t file_size = lseek(fd, 0, SEEK_END);
if (file_size < 0) {
fprintf(stderr, "File is unseekable.\n");
exit(1);
}
close(fd);
off_t offset_range = file_size / num_children;
off_t extra_range = file_size % num_children;
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("Pipe failed");
exit(1);
}
for (int i = 0; i < num_children; ++i) {
pid_t pid = fork();
if (pid < 0) {
perror("Fork failed");
exit(1);
}
if (pid == 0) {
off_t start_offset = i * offset_range;
off_t end_offset = start_offset + offset_range;
if (i == num_children - 1) {
end_offset += extra_range;
}
off_t count = count_alphabet(filename, start_offset, end_offset);
fprintf(stderr, "Process[%d] has found %lld alphabet letters in (%lld ~ %lld).\n",
getpid(), (long long)count, (long long)start_offset, (long long)end_offset);
// carefully write the count to the pipe
for (;;) {
ssize_t write_count = write(pipefd[1], &count, sizeof count);
if (write_count == sizeof count)
return 0;
if (write_count == -1) {
if (errno == EINTR)
continue;
perror("error writing to the pipe");
exit(1);
}
fprintf(stderr, "cannot write to the pipe\n");
exit(1);
}
}
}
off_t total_count = 0;
int status;
int pid;
while ((pid = waitpid(-1, &status, 0)) >= 0) {
if (WIFEXITED(status)) {
int exit_status = WEXITSTATUS(status);
if (exit_status == 0) {
for (;;) {
off_t count;
ssize_t read_count = read(pipefd[0], &count, sizeof count);
if (read_count == sizeof count) {
total_count += count;
break;
} else {
if (read_count < 0) {
if (errno == EINTR)
continue;
perror("error reading from the pipe");
exit(1);
}
if (read_count == 0)
fprintf(stderr, "cannot read from the pipe\n");
else
fprintf(stderr, "partial read from the pipe\n");
exit(1);
}
}
} else {
fprintf(stderr, "child process [%d] returned non zero status %d\n",
pid, exit_status);
}
}
}
fprintf(stderr, "Process[%d] has found %lld alphabet letters in %s\n",
getpid(), (long long)total_count, filename);
return 0;
}
Note that even writing the counts in binary chunks to the pipe does not guarantee atomic operation: there is a slim but non zero possibility for the bytes to be interlaced between threads. The file handle for writing to the pipe should be locked for each write operation.
本文标签: ccount alphabet of one file with multi processes in LinuxStack Overflow
版权声明:本文标题:c - count alphabet of one file with multi processes in Linux - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1736310536a1934411.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
main
function, and do what the parent process is doing, which includes creating new processes. – Some programmer dude Commented Nov 21, 2024 at 13:23temp
? If this isn't an actual minimal reproducible example of the code you're building and running, it's going to be hard to help you. – Some programmer dude Commented Nov 21, 2024 at 13:24