大文本数据排序
最编程
2024-07-27 19:52:58
...
大文本数据排序及二分法
#define _CRT_SECURE_NO_WARNINGS #include<stdio.h> #include<stdlib.h> char **g_pp;//保存指针数组 #define N 28 struct index { int *pindex; int length; }allindex;//索引 int getN() { FILE *pf = fopen("file.txt", "r"); if (pf==NULL) { return -1; } else { int i = 0; while (!feof(pf)) { char str[50] = { 0 }; fgets(str, 50, pf);//读取 i++; } fclose(pf); return i; } } void eatN(char *str) { while (*str!='\0') { if (*str=='\r' || *str=='\n') { *str = '\0'; } str++; } } void initmem() { g_pp = calloc(N, sizeof(char*));//分配指针数组 FILE *pf = fopen("file.txt", "r"); if (pf == NULL) { return -1; } else { for (int i = 0; i < N; i++) { char str[50] = { 0 }; fgets(str, 50, pf);//读取 g_pp[i] = calloc(strlen(str) + 1, sizeof(char));//分配 sprintf(g_pp[i], str);//打印进去 eatN(g_pp[i]); printf("%s", g_pp[i]);//显示测试 } fclose(pf); } } int com(void *p1, void*p2) { char **pp1 = p1; char **pp2 = p2; return strcmp(*pp1, *pp2); } void sort() { qsort(g_pp, N, sizeof(char*), com); } void show() { printf("\n此时状态\n"); for (int i = 0; i < N;i++) { printf("\n%s", g_pp[i]); } } void writetofile() { FILE *pf = fopen("filesort.txt", "w"); for (int i = 0; i < N;i++) { char temp[100] = { 0 }; sprintf(temp, "%s\n", g_pp[i]); fputs(temp, pf); } fclose(pf); } void init(char *path) { printf("\n索引数组开始分配"); allindex.length = N; allindex.pindex = calloc(N, sizeof(int));//分配内存 printf("\n索引数组完成分配"); printf("\n开始读取"); FILE *pf = fopen("filesort.txt", "rb");//\r\n->\n if (pf == NULL) { return -1; } else { int alllength = 0; for (int i = 0; i < N; i++) { char str[50] = { 0 }; fgets(str, 50, pf); allindex.pindex[i] = alllength;//错位从0开始 int length = strlen(str); alllength += length; } fclose(pf); } printf("\n结束读取"); printf("\n开始写入"); FILE *pfw = fopen("index.txt", "wb");//写入索引 fwrite(allindex.pindex, sizeof(int), allindex.length, pfw); fclose(pfw);//关闭 printf("\n结束写入"); free(allindex.pindex); } void qucik() { printf("\n索引数组开始分配"); allindex.length = N; allindex.pindex = calloc(N, sizeof(int));//分配内存 printf("\n索引数组完成分配"); printf("\n开始读取"); FILE *pfw = fopen("index.txt", "rb");//写入索引 fread(allindex.pindex, sizeof(int), allindex.length, pfw); fclose(pfw);//关闭 printf("\n结束读取"); } void main内存索引() { //int num = 0; //scanf("%d", &num); //printf("%d", getN()); /*initmem(); sort(); show(); writetofile();*/ //init("filesort.txt"); qucik(); FILE *pf = fopen("filesort.txt", "rb"); while (1) { printf("\n请输入要读取的行数"); int num = 0; scanf("%d", &num); fseek(pf, allindex.pindex[num], SEEK_SET); char str[128] = { 0 }; fgets(str, 128, pf);//读取 printf("\n%s", str); } fclose(pf); system("pause"); } void main索引文件查找() { FILE *pf1 = fopen("index.txt", "rb"); FILE *pf2 = fopen("filesort.txt", "rb"); while (1) { printf("\n请输入要读取的行数"); int num = 0; scanf("%d", &num); int indexnum = 0; fseek(pf1, num*sizeof(int), SEEK_SET); fread(&indexnum, sizeof(int), 1, pf1);//读索引到indexnum fseek(pf2, indexnum, SEEK_SET); char str[128] = { 0 }; fgets(str, 128, pf2);//读取 printf("\n%s", str); } fclose(pf1); fclose(pf2); system("pause"); } void eatg(char *str) { while (*str!='\0') { if (*str=='-') { *str = '\0'; } str++; } } void binsearch(char *searchstr) { int tou = 0; int wei = N - 1; int flag = 0; while (tou <=wei) { int zhong = (tou + wei) / 2; char zhongstr[256] = { 0 }; { FILE *pf1 = fopen("index.txt", "rb"); FILE *pf2 = fopen("filesort.txt", "rb"); int indexnum = 0; fseek(pf1, zhong*sizeof(int), SEEK_SET); fread(&indexnum, sizeof(int), 1, pf1);//读索引zhong到indexnum fseek(pf2, indexnum, SEEK_SET); fgets(zhongstr, 128, pf2);//读取 fclose(pf1); fclose(pf2); } eatN(zhongstr); char pnewzhongstr[256] = { 0 }; sprintf(pnewzhongstr, zhongstr); eatg(pnewzhongstr);//遇到-终止 int res = strcmp(pnewzhongstr, searchstr);//1 0 -1 if (res==0) { flag = 1; printf("%s", zhongstr); break; } else if (res==1) { wei = zhong - 1; } else { tou = zhong + 1; } } if (flag) { printf("\nfind"); } else { printf("\n not find"); } } void main() { char str[256] = { 0 }; scanf("%s", str); binsearch(str); system("pause"); }
上一篇: 分割文件根据大小操作
下一篇: 如何在大文件中快速进行排序操作
推荐阅读
-
大语言建模助力病理人工智能从报告文本中自动划分TNM分期|Top Journal Essentials-24-10-17- I. 引言
-
基本数据结构 - 利用递归完成气泡排序
-
Java 读取单词,作为私有知识库做 RAG 问答检索和生成技术(RAG),可以提高文本的准确性和企业数据的相关性
-
[数据结构与算法] 排序算法
-
数据结构—排序
-
雪球学习 MySQL [第 2.3 讲]:MySQL数据过滤和排序详解:WHERE条件、ORDER BY排序和LIMIT分页查询
-
八大排序详解
-
突破限制:探索数据结构堆的多种用途,不仅局限于堆排序
-
使用C#编写的大/小顶堆数据结构
-
总结十种经典排序算法:如何在大数据时代进行排序?