Byte Array with control characters to string

Question

So I've found a few examples but none that are exactly as I want it (close though)

example of what i'm looking for

byte[] array = { 0x02, 0x64, 0x40, 0x40, 0x03 };
string text = SomeMagicalMethod(array);
//displays <STX>FOO<ETX>

At one point I had a dictionary with the non-printable character values and the <...> stuff in it, but combining the 2 strings is killing my head. I currently have this, but it ASCIIEncoding takes out all Control characters.

public static void Add(this TextBox tb, byte[] array)
{
  string input = System.Text.ASCIIEncoding.ASCII.GetString(array);

  Regex.Replace( input     ,
                 @"\p{Cc}" ,
                 a => string.Format( "[{0:X2}]" , (byte)a.Value[0] )
               ) ;
  Add(tb, input);

}

public static void Add(this TextBox tb, string text)
{
  text += ENTER;
  tb.Dispatcher.Invoke( DispatcherPriority.Background,
    new Action(delegate() { tb.Text += text; })
    );
}

EDIT Using NUnit i ran the answered codes against these tests. The last one has one value that is out of the scope of 0x7F. Although the code this will be used in should NOT have that, it is better to be safe then sorry.

[TestFixture]
public class StringExtensionsTest
{
    [Test]
    public void SingleByteControlCharacterTest()
    {
        AssertSingleByte(0x00, "<NUL>"); AssertSingleByte(0x01, "<SOH>");
        AssertSingleByte(0x02, "<STX>"); AssertSingleByte(0x03, "<ETX>");
        AssertSingleByte(0x04, "<EOT>"); AssertSingleByte(0x05, "<ENQ>");
        AssertSingleByte(0x06, "<ACK>"); AssertSingleByte(0x07, "<BEL>");
        AssertSingleByte(0x08, "<BS>" ); AssertSingleByte(0x09, "<HT>" );
        AssertSingleByte(0x0A, "<LF>" ); AssertSingleByte(0x0B, "<VT>" );
        AssertSingleByte(0x0C, "<FF>" ); AssertSingleByte(0x0D, "<CR>" );
        AssertSingleByte(0x0E, "<SO>" ); AssertSingleByte(0x0F, "<SI>" );

        AssertSingleByte(0x10, "<DLE>"); AssertSingleByte(0x11, "<DC1>");
        AssertSingleByte(0x12, "<DC2>"); AssertSingleByte(0x13, "<DC3>");
        AssertSingleByte(0x14, "<DC4>"); AssertSingleByte(0x15, "<NAK>");
        AssertSingleByte(0x16, "<SYN>"); AssertSingleByte(0x17, "<ETB>");
        AssertSingleByte(0x18, "<CAN>"); AssertSingleByte(0x19, "<EM>" );
        AssertSingleByte(0x1A, "<SUB>"); AssertSingleByte(0x1B, "<ESC>");
        AssertSingleByte(0x1C, "<FS>" ); AssertSingleByte(0x1D, "<GS>" );
        AssertSingleByte(0x1E, "<RS>" ); AssertSingleByte(0x1F, "<US>" );
        AssertSingleByte(0x7F, "<DEL>");
    }
    private void AssertSingleByte(byte value, string expected)
    {
        byte[] array = new byte[]{value};
        var actual = array.asciiOctets2String();
        Assert.AreEqual(expected, actual, "Didn't print the epxected result");
    }

    [Test]
    public void SingleCharacterTest()
    {
        for (byte i = 0x20; i < 0x7F; i++) 
        {
            AssertSingleByte(i, char.ToString((char)i));
        }
    }

    [Test]
    public void SimpleTestTogether()
    {
        byte[] array = {0x02, 0x46,0x4F, 0x4F, 0x03};
        string expected = "<STX>FOO<ETX>";
        string actual = array.asciiOctets2String();
        Assert.AreEqual(expected, actual, "Simple test failed");
    }
    [Test]
    public void BigTest()
    {
        byte[] array = {
            0x00, 0x7F, 0x03, 0x52, 0x00, 0x00, 0x2F, 0x5F, 0x20, 0x0F, 0x43, 0x41, 0x52, 0x44, 0x48, 0x4F,
            0x4C, 0x44, 0x45, 0x52, 0x2F, 0x56, 0x49, 0x53, 0x41, 0x9F, 0x1F, 0x07, 0x30, 0x30, 0x30, 0x30,
            0x30, 0x30, 0x30};
        string expected = "<NUL><DEL><ETX>R<NUL><NUL>/_ <SI>CARDHOLDER/VISA?<US><BEL>0000000";
        string actual = array.asciiOctets2String();

        Assert.AreEqual(expected, actual, "BigTest Failed");
    }
}

p.s.w.g · Accepted Answer

You'll have to create a table of what you want the control character to be, for example:

Dictionary<byte, string> controlCodes = new Dictionary<byte, string>
{
    { 0x00, "<NUL>" },
    { 0x01, "<SOH>" },
    { 0x02, "<STX>" },
    { 0x03, "<ETX>" },
    ...
}

You can use this Wikipedia page to build the full list.

Then building your would be as simple as:

var output = String.Join(
    string.Empty, 
    array.Select(b => 
        controlCodes.ContainsKey(b) 
        ? controlCodes[b]
        : Encoding.ASCII.GetString(new[] { b })));

Of course this could be improved significantly. Something like this would be a bit more efficient:

string str = string.Empty;
int i = 0, j = 0;
while (i < array.Length && j > -1)
{
    j = Array.FindIndex(array, i, b => controlCodes.ContainsKey(b));
    if (j > -1) 
    {
        if (j > i)
        {
            str += Encoding.ASCII.GetString(array, i, j - i);
        }

        str += controlCodes[array[j]];
        i = j + 1;
    }
    else 
    {
        str += Encoding.ASCII.GetString(array, i, array.Length - i);
    }
}

return str;

Nicholas Carey · Answer

The C# ASCII converter junks any character outside of the 7-bit ASCII range (0x00–0x7F).

If you have code points outside that range, then you don't have ASCII. Use the appropriate encoding (like UTF-8).

If you know that your byte array is ASCII (meaning that it only contains the code points 0x00–0x7F), then you don't need the converter at all. Something like this will do you:

static string asciiOctets2String( byte[] bytes )
{
  StringBuilder sb = new StringBuilder(bytes.Length);
  foreach ( char c in bytes.Select( b => (char) b ) )
  {
    switch ( c )
    {
    case '\u0000' : sb.Append("<NUL>") ; break ;
    case '\u0001' : sb.Append("<SOH>") ; break ;
    case '\u0002' : sb.Append("<STX>") ; break ;
    case '\u0003' : sb.Append("<ETX>") ; break ;
    case '\u0004' : sb.Append("<EOT>") ; break ;
    case '\u0005' : sb.Append("<ENQ>") ; break ;
    case '\u0006' : sb.Append("<ACK>") ; break ;
    case '\u0007' : sb.Append("<BEL>") ; break ;
    case '\u0008' : sb.Append("<BS>" ) ; break ;
    case '\u0009' : sb.Append("<HT>" ) ; break ;
    case '\u000A' : sb.Append("<LF>" ) ; break ;
    case '\u000B' : sb.Append("<VT>" ) ; break ;
    case '\u000C' : sb.Append("<FF>" ) ; break ;
    case '\u000D' : sb.Append("<CR>" ) ; break ;
    case '\u000E' : sb.Append("<SO>" ) ; break ;
    case '\u000F' : sb.Append("<SI>" ) ; break ;
    case '\u0010' : sb.Append("<DLE>") ; break ;
    case '\u0011' : sb.Append("<DC1>") ; break ;
    case '\u0012' : sb.Append("<DC2>") ; break ;
    case '\u0013' : sb.Append("<DC3>") ; break ;
    case '\u0014' : sb.Append("<DC4>") ; break ;
    case '\u0015' : sb.Append("<NAK>") ; break ;
    case '\u0016' : sb.Append("<SYN>") ; break ;
    case '\u0017' : sb.Append("<ETB>") ; break ;
    case '\u0018' : sb.Append("<CAN>") ; break ;
    case '\u0019' : sb.Append("<EM>" ) ; break ;
    case '\u001A' : sb.Append("<SUB>") ; break ;
    case '\u001B' : sb.Append("<ESC>") ; break ;
    case '\u001C' : sb.Append("<FS>" ) ; break ;
    case '\u001D' : sb.Append("<GS>" ) ; break ;
    case '\u001E' : sb.Append("<RS>" ) ; break ;
    case '\u001F' : sb.Append("<US>" ) ; break ;
    case '\u007F' : sb.Append("<DEL>") ; break ;
    default :
      if ( c > '\u007F' )
      {
        sb.AppendFormat( @"\u{0:X4}" , (ushort)c ) ; // in ASCII, any octet in the range 0x80-0xFF doesn't have a character glyph associated with it
      }
      else
      {
        sb.Append( c ) ;
      }
      break ;
    }
  }
  return sb.ToString() ;
}

Or another approach, probably faster than a switch and probably faster than a dictionary-based approach, but not as readable:

private static string[] controlChars =
{
  "<NUL>" , "<SOH>" , "<STX>" , "<ETX>" ,
  "<EOT>" , "<ENQ>" , "<ACK>" , "<BEL>" ,
  "<BS>"  , "<HT>"  , "<LF>"  , "<VT>"  ,
  "<FF>"  , "<CR>"  , "<SO>"  , "<SI>"  ,
  "<DLE>" , "<DC1>" , "<DC2>" , "<DC3>" ,
  "<DC4>" , "<NAK>" , "<SYN>" , "<ETB>" ,
  "<CAN>" , "<EM>"  , "<SUB>" , "<ESC>" ,
  "<FS>"  , "<GS>"  , "<RS>"  , "<US>"  ,
} ;
static string asciiOctets2String( byte[] bytes )
{
  StringBuilder sb = new StringBuilder(bytes.Length);
  foreach ( char c in bytes.Select( b => (char) b ) )
  {
    if      ( c <  '\u0020' ) { sb.Append( controlChars[c] ) ; }
    else if ( c == '\u007F' ) { sb.Append( "<DEL>"         ) ; }
    else if ( c >  '\u007F' ) { sb.AppendFormat( @"\u{0:X4}" , (ushort)c ) ; }
    else /* 0x20-0x7E */      { sb.Append( c ) ; }
  }
  return sb.ToString() ;
}

Byte Array with control characters to string

Tags:

c#

Robert Snyder

2 Answers

p.s.w.g

Nicholas Carey

Recent Activity

Donate For Us

Byte Array with control characters to string

Tags:

c#

Robert Snyder

2 Answers

p.s.w.g

Nicholas Carey

Related questions

Recent Activity

Donate For Us