I have an indented file that I need to parsed using java, I need some way to place this in a Section class as shown below
root
root1
text1
text1.1
text1.2
text2
text2.1
text2.2
root2
text1
text1.1
text1.2
text2
text2.1
text2.2.2
I have the class for putting the indented stuff it looks like
public class Section
{
private List<Section> children;
private String text;
private int depth;
public Section(String t)
{
text =t;
}
public List<Section> getChildren()
{
if (children == null)
{
children = new ArrayList<Section>();
}
return children;
}
public void setChildren(List<Section> newChildren)
{
if (newChildren == null) {
children = newChildren;
} else {
if (children == null) {
children = new ArrayList<Section>();
}
for (Section child : newChildren) {
this.addChild(child);
}
}
}
public void addChild(Section child)
{
if (children == null) {
children = new ArrayList<Section>();
}
if (child != null) {
children.add(child);
}
}
public String getText()
{
return text;
}
public void setText(String newText)
{
text =newText;
}
public String getDepth()
{
return depth;
}
public void setDepth(int newDepth)
{
depth = newDepth;
}
}
I need some way to parse the file and place it in expected result which us a Section object which would look like below
Section=
Text="Root"
Children
Child1: Text= "root1"
Child1: "text1"
Child1="Text 1.1"
Child2="Text 1.2"
Child2: "text2"
Child1="Text 2.1"
Child2="Text 2.2"
Children
Child2: Text= "root2"
Child1: "text1"
Child1="Text 1.1"
Child2="Text 1.2"
Child2: "text2"
Child1="Text 2.1"
Child2="Text 2.2"
Here is some code that I have started
int indentCount=0;
while(String text = reader.readline()
{
indentCount=countLeadingSpaces(String word);
//TODO create the section here
}
public static int countLeadingSpaces(String word)
{
int length=word.length();
int count=0;
for(int i=0;i<length;i++)
{
char first = word.charAt(i);
if(Character.isWhitespace(first))
{
count++;
}
else
{
return count;
}
}
return count;
}
I added a parent pointer as well. Maybe the text can be parsed without it, but parent pointers make it easier. First of all, you need to have more constructors:
static final int root_depth = 4; // assuming 4 whitespaces precede the tree root
public Section(String text, int depth) {
this.text = text;
this.depth = depth;
this.children = new ArrayList<Section>();
this.parent = null;
}
public Section(String text, int depth, Section parent) {
this.text = text;
this.depth = depth;
this.children = new ArrayList<Section>();
this.parent = parent;
}
Then, when you start parsing the file, read it line by line:
Section prev = null;
for (String line; (line = bufferedReader.readLine()) != null; ) {
if (prev == null && line begins with root_depth whitespaces) {
Section root = new Section(text_of_line, root_depth);
prev = root;
}
else {
int t_depth = no. of whitespaces at the beginning of this line;
if (t_depth > prev.getDepth())
// assuming that empty sections are not allowed
Section t_section = new Section(text_of_line, t_depth, prev);
prev.addChild(t_section);
}
else if (t_depth == prev.getDepth) {
Section t_section = new Section(text_of_line, t_depth, prev.getParent());
prev.getParent().addChild(t_section);
}
else {
while (t_depth < prev.getDepth()) {
prev = prev.getParent();
}
// at this point, (t_depth == prev.getDepth()) = true
Section t_section = new Section(text_of_line, t_depth, prev.getParent());
prev.getParent().addChild(t_section);
}
}
}
I have glossed over some finer points of the pseudo-code, but I think you get the overall idea of how to go about this parsing. Do remember to implement the methods addChild(), getDepth(), getParent(), etc.
Surprisingly complex problem... but here's a pseudocode
intialize a stack
push first line to stack
while (there are more lines to read) {
S1 = top of stack // do not pop off yet
S2 = read a line
if depth of S1 < depth of S2 {
add S2 as child of S1
push S2 into stack
}
else {
while (depth of S1 >= depth of S2 AND there are at least 2 elements in stack) {
pop stack
S1 = top of stack // do not pop
}
add S2 as child of S1
push S2 into stack
}
}
return bottom element of stack
where depth is the # leading whitespaces. You might have to either modify or wrap the Section class to store the depth of the line.
Based on this answer, I've created a C# solution.
It allows for multiple roots and assumes an input structure like:
Test
A
B
C
C1
C2
D
Something
One
Two
Three
An example usage of the code would be:
var lines = new[]
{
"Test",
"\tA",
"\tB",
"\tC",
"\t\tC1",
"\t\tC2",
"\tD",
"Something",
"\tOne",
"\tTwo",
"\tThree"
};
var roots = IndentedTextToTreeParser.Parse(lines, 0, '\t');
var dump = IndentedTextToTreeParser.Dump(roots);
Console.WriteLine(dump);
You can specify the root indention (zero by default) and also the idention character (a tab \t
by default).
The full code:
namespace MyNamespace
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
public static class IndentedTextToTreeParser
{
// https://stackoverflow.com/questions/21735468/parse-indented-text-tree-in-java
public static List<IndentTreeNode> Parse(IEnumerable<string> lines, int rootDepth = 0, char indentChar = '\t')
{
var roots = new List<IndentTreeNode>();
// --
IndentTreeNode prev = null;
foreach (var line in lines)
{
if (string.IsNullOrEmpty(line.Trim(indentChar)))
throw new Exception(@"Empty lines are not allowed.");
var currentDepth = countWhiteSpacesAtBeginningOfLine(line, indentChar);
if (currentDepth == rootDepth)
{
var root = new IndentTreeNode(line, rootDepth);
prev = root;
roots.Add(root);
}
else
{
if (prev == null)
throw new Exception(@"Unexpected indention.");
if (currentDepth > prev.Depth + 1)
throw new Exception(@"Unexpected indention (children were skipped).");
if (currentDepth > prev.Depth)
{
var node = new IndentTreeNode(line.Trim(), currentDepth, prev);
prev.AddChild(node);
prev = node;
}
else if (currentDepth == prev.Depth)
{
var node = new IndentTreeNode(line.Trim(), currentDepth, prev.Parent);
prev.Parent.AddChild(node);
prev = node;
}
else
{
while (currentDepth < prev.Depth) prev = prev.Parent;
// at this point, (currentDepth == prev.Depth) = true
var node = new IndentTreeNode(line.Trim(indentChar), currentDepth, prev.Parent);
prev.Parent.AddChild(node);
}
}
}
// --
return roots;
}
public static string Dump(IEnumerable<IndentTreeNode> roots)
{
var sb = new StringBuilder();
foreach (var root in roots)
{
doDump(root, sb, @"");
}
return sb.ToString();
}
private static int countWhiteSpacesAtBeginningOfLine(string line, char indentChar)
{
var lengthBefore = line.Length;
var lengthAfter = line.TrimStart(indentChar).Length;
return lengthBefore - lengthAfter;
}
private static void doDump(IndentTreeNode treeNode, StringBuilder sb, string indent)
{
sb.AppendLine(indent + treeNode.Text);
foreach (var child in treeNode.Children)
{
doDump(child, sb, indent + @" ");
}
}
}
[DebuggerDisplay(@"{Depth}: {Text} ({Children.Count} children)")]
public class IndentTreeNode
{
public IndentTreeNode(string text, int depth = 0, IndentTreeNode parent = null)
{
Text = text;
Depth = depth;
Parent = parent;
}
public string Text { get; }
public int Depth { get; }
public IndentTreeNode Parent { get; }
public List<IndentTreeNode> Children { get; } = new List<IndentTreeNode>();
public void AddChild(IndentTreeNode child)
{
if (child != null) Children.Add(child);
}
}
}
I've also included an method Dump()
to convert the tree back to a string in order to better debug the algorithm itself.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With