Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How would I go about parsing the Java class file constant pool?

According to https://en.wikipedia.org/wiki/Java_class_file#General_layout - the Java constant pool of a class file begins 10 bytes into the file.

So far, I've been able to parse everything before that (magic to check if it's a classfile, major/minor versions, constant pool size) but I still don't understand exactly how to parse the constant pool. Like, are there opcodes for specifying method refs and other things?

Is there any way I can reference each hex value before text is represented in hex to find out what the following value is?

Should I go about by splitting each set of entries by NOPs (0x00) and then parsing each byte that isn't a text value?

For example, how can I work out exactly what each of these values represents? enter image description here

like image 291
user3530525 Avatar asked Jan 08 '23 16:01

user3530525


1 Answers

The only relevant documentation for class files you need is the The Java® Virtual Machine Specification, especially Chapter 4. The class File Format and, if you are going to parse more than the constant pool, Chapter 6. The Java Virtual Machine Instruction Set.

The constant pool consists of variable length items whose first byte determines its type which in turn dictates the size. Most items consist of one or two indices pointing to other items. A simple parsing code which doesn’t need any 3rd party library may look like this:

public static final int HEAD=0xcafebabe;
// Constant pool types
public static final byte CONSTANT_Utf8               = 1;
public static final byte CONSTANT_Integer            = 3;
public static final byte CONSTANT_Float              = 4;
public static final byte CONSTANT_Long               = 5;
public static final byte CONSTANT_Double             = 6;
public static final byte CONSTANT_Class              = 7;
public static final byte CONSTANT_String             = 8;
public static final byte CONSTANT_FieldRef           = 9;
public static final byte CONSTANT_MethodRef          =10;
public static final byte CONSTANT_InterfaceMethodRef =11;
public static final byte CONSTANT_NameAndType        =12;
public static final byte CONSTANT_MethodHandle       =15;
public static final byte CONSTANT_MethodType         =16;
public static final byte CONSTANT_InvokeDynamic      =18;
public static final byte CONSTANT_Module             =19;
public static final byte CONSTANT_Package            =20;

static void parseRtClass(Class<?> clazz) throws IOException, URISyntaxException {
    URL url = clazz.getResource(clazz.getSimpleName()+".class");
    if(url==null) throw new IOException("can't access bytecode of "+clazz);
    Path p = Paths.get(url.toURI());
    if(!Files.exists(p))
        p = p.resolve("/modules").resolve(p.getRoot().relativize(p));
    parse(ByteBuffer.wrap(Files.readAllBytes(p)));
}
static void parseClassFile(Path path) throws IOException {
    ByteBuffer bb;
    try(FileChannel ch=FileChannel.open(path, StandardOpenOption.READ)) {
        bb=ch.map(FileChannel.MapMode.READ_ONLY, 0, ch.size());
    }
    parse(bb);
}
static void parse(ByteBuffer buf) {
    if(buf.order(ByteOrder.BIG_ENDIAN).getInt()!=HEAD) {
        System.out.println("not a valid class file");
        return;
    }
    int minor=buf.getChar(), ver=buf.getChar();
    System.out.println("version "+ver+'.'+minor);
    for(int ix=1, num=buf.getChar(); ix<num; ix++) {
        String s; int index1=-1, index2=-1;
        byte tag = buf.get();
        switch(tag) {
            default:
                System.out.println("unknown pool item type "+buf.get(buf.position()-1));
                return;
            case CONSTANT_Utf8: decodeString(ix, buf); continue;
            case CONSTANT_Class: case CONSTANT_String: case CONSTANT_MethodType:
            case CONSTANT_Module: case CONSTANT_Package:
                s="%d:\t%s ref=%d%n"; index1=buf.getChar();
                break;
            case CONSTANT_FieldRef: case CONSTANT_MethodRef:
            case CONSTANT_InterfaceMethodRef: case CONSTANT_NameAndType:
                s="%d:\t%s ref1=%d, ref2=%d%n";
                index1=buf.getChar(); index2=buf.getChar();
                break;
            case CONSTANT_Integer: s="%d:\t%s value="+buf.getInt()+"%n"; break;
            case CONSTANT_Float: s="%d:\t%s value="+buf.getFloat()+"%n"; break;
            case CONSTANT_Double: s="%d:\t%s value="+buf.getDouble()+"%n"; ix++; break;
            case CONSTANT_Long: s="%d:\t%s value="+buf.getLong()+"%n"; ix++; break;
            case CONSTANT_MethodHandle:
                s="%d:\t%s kind=%d, ref=%d%n"; index1=buf.get(); index2=buf.getChar();
                break;
             case CONSTANT_InvokeDynamic:
                s="%d:\t%s bootstrap_method_attr_index=%d, ref=%d%n";
                index1=buf.getChar(); index2=buf.getChar();
                break;
        }
        System.out.printf(s, ix, FMT[tag], index1, index2);
    }
}
private static String[] FMT= {
    null, "Utf8", null, "Integer", "Float", "Long", "Double", "Class",
    "String", "Field", "Method", "Interface Method", "Name and Type",
    null, null, "MethodHandle", "MethodType", null, "InvokeDynamic",
    "Module", "Package"
};

private static void decodeString(int poolIndex, ByteBuffer buf) {
    int size=buf.getChar(), oldLimit=buf.limit();
    buf.limit(buf.position()+size);
    StringBuilder sb=new StringBuilder(size+(size>>1)+16)
        .append(poolIndex).append(":\tUtf8 ");
    while(buf.hasRemaining()) {
        byte b=buf.get();
        if(b>0) sb.append((char)b);
        else
        {
            int b2 = buf.get();
            if((b&0xf0)!=0xe0)
                sb.append((char)((b&0x1F)<<6 | b2&0x3F));
            else
            {
                int b3 = buf.get();
                sb.append((char)((b&0x0F)<<12 | (b2&0x3F)<<6 | b3&0x3F));
            }
        }
    }
    buf.limit(oldLimit);
    System.out.println(sb);
}

Don’t get confused by the getChar() calls, I used them as a convenient way for getting an unsigned short, instead of getShort()&0xffff.

The code above simply prints the indices of references to other pool items. For decoding the items, you may first store the data of all items into a random access data structure, i.e. array or List as items may refer to items with a higher index number. And mind the starting at index 1

like image 70
Holger Avatar answered Feb 03 '23 02:02

Holger