Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Is there any way to shift content of a file without storing it in array in c?

Tags:

c

I am trying to replace words from a file, This works fine with words of the same length.

I know it can be done by storing content in a temporary array and then shifting but I was wondering if it can be done without using array.

    #include<stdio.h>
    #include<string.h>
    int main(int argc, char **argv)
    {
            char s1[20], s2[20];
            FILE *fp = fopen(argv[1], "r+");

            strcpy(s1, argv[2]);
            strcpy(s2, argv[3]);
            int l, i;

            while(fscanf(fp, "%s", s1)!=EOF){
                    if(strcmp(s1, argv[2]) == 0){
                            l = strlen(s2);
                            fseek(fp, -l, SEEK_CUR);
                            i=0;
                            while(l>0){
                                    fputc(argv[3][i], fp);
                                    i++;
                                    l--;
                            }
                    }
            }
    }

Here is my code for replacing same length words, what can I modify here for different lengths?

like image 851
Koalafied Koala Avatar asked Sep 09 '19 15:09

Koalafied Koala


1 Answers

Assuming that the OP's goal is to avoid storing the whole content of the file into a byte array (maybe not enough memory) and he also said that it needs to "shift" the file's content, so it cannot use a temp file to make the text replacement (perhaps not enough room in the storage device).

Note that copying into a temp file would be the easiest method.

So as I can see the solution has two algorithms:

  • Shift to left: Replace a text with another of equal or smaller length.
  • Shift to right: Replace a text with a longer one.

Shift to left:

  1. Maintain 2 file position pointers: one for the read position (rdPos) and another for the write position (wrPos).
  2. Both start in zero.
  3. read char from rdPos until find the oldText and write it into the wrPos (but only if rdPos != wrPos to avoid unnecessary write operations).
  4. write the newText into wrPos.
  5. repeat from step 3 until EOF.
  6. if len(oldText) > len(newText) then truncate the file

Shift to right:

  1. Maintain 2 file position pointers: (rdPos and wrPos).
  2. scan the whole file to find the number of the oldText occurrences.
  3. store their file positions into a small array (not strictly needed, but useful to avoid a second reverse scan of the oldText)
  4. set rdPos = EOF-1 (the last char in the file)
  5. set wrPos = EOF+foundCount*(len(newText)-len(oldText)): reserving enough extra space for the shifting.
  6. read char from rdPos until find the position in the "found" array and write the char into the wrPos.
  7. write the newText into wrPos.
  8. repeat from step 6 until BOF.

I wrote the following implementation as an example of the mentioned algorithms, but without caring too much about validations and edge cases.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>

#define MAX_ITEMS   100
#define DO_WRITE    0x01
#define DO_FIND     0x02

FILE *fp;
long rdPos = 0L, wrPos = 0L, rdCount=0L, wrCount=0L;
int  newLen, oldLen;
char *newText, *oldText;
struct found_t { int len; long pos[MAX_ITEMS];} found;

/* helper functions */

void writeChars(char *buffer, int len){
  if(wrPos < rdPos){
    long p = ftell(fp); 
    fseek(fp, wrPos, SEEK_SET);
    fwrite(buffer, len, 1, fp); 
    fseek(fp, p, SEEK_SET);
    wrCount += len;
  }
  wrPos += len;
}

int nextReadChar = -1;
int readChar(){
  int c;
  if(nextReadChar == EOF) {
    if((c = fgetc(fp)) != EOF)
      rdCount++;
  } else {
      c = nextReadChar;
      nextReadChar = EOF;
  }
  return c;
}

int findFirstChar(int action){
  int c; char ch;
  for(; (c = readChar()) != EOF && c != (int)oldText[0]; rdPos++) 
    if(action == DO_WRITE) {
      ch = (char)c; 
      writeChars(&ch, 1);
    }
  return c;
}

int testOldText(int c, int action){
  char *cmp;
  for(cmp = oldText; *cmp != '\0' && c == (int)*cmp; cmp++) 
    c = readChar();

  nextReadChar = c;
  if(*cmp == '\0') {  /* found oldText         */
    if(action == DO_FIND)
      found.pos[found.len++] = rdPos;
    rdPos += oldLen;
    if(action == DO_WRITE){
      writeChars(newText, newLen);
      found.len++;
    }
  }
  else {              /* some chars were equal */
    if(action == DO_WRITE)
      writeChars(oldText, cmp-oldText);
    rdPos += cmp-oldText;
  }
  return c;
}  

void writeReverseBlock(long firstCharPos){
  for(;rdPos >= firstCharPos+oldLen; rdPos--, wrPos--, rdCount++, wrCount++){
    int c;
    fseek(fp, rdPos, SEEK_SET); c = fgetc(fp);
    fseek(fp, wrPos, SEEK_SET); fputc(c, fp);
  }
  rdPos = firstCharPos-1;
  wrPos -= newLen-1;
  fseek(fp, wrPos--, SEEK_SET);
  fwrite(newText, newLen, 1, fp);
  wrCount += newLen;
}

void scanFile(int action){
  int c;
  do {
    if( (c = findFirstChar(DO_WRITE)) == EOF ) break;
  }while(testOldText(c, DO_WRITE) != EOF);
}

/** Main Algorithms */

void shiftToLeft(){
  scanFile(DO_WRITE);
  fflush(fp);
  ftruncate(fileno(fp), wrPos);
}

void shiftToRight(){
  int i;
  scanFile(DO_FIND);
  wrPos = --rdPos + found.len * (newLen-oldLen); /* reserve space after EOF */
  for(i=found.len-1; i>=0; i--)
    writeReverseBlock(found.pos[i]);
}

/* MAIN program */
int main(int argc, char **argv){
  if(argc != 4){
    fprintf(stderr, "Usage: %s file.ext oldText newText\n", argv[0]);
    return 1;
  }
  if(!(fp = fopen(argv[1], "r+b"))) {
    fprintf(stderr, "Cannot open file '%s'\n", argv[1]);
    return 2;
  }
  oldLen = strlen(oldText = strdup(argv[2]));
  newLen = strlen(newText = strdup(argv[3]));
  found.len = 0;

  /* which algorithm? */
  if(newLen <= oldLen) shiftToLeft();
  else                 shiftToRight();
  fclose(fp);

  printf("%7d occurrences\n"
         "%7ld bytes read\n"
         "%7ld bytes written\n", found.len, rdCount, wrCount);
  return 0;
}
like image 139
Wilfredo Pomier Avatar answered Nov 10 '22 15:11

Wilfredo Pomier