I have to as input data an array with hashes, each hash is description of an html tag (open and end position in the text and type of the tag). I need to generate another array where tags are put in order.
For example:
input = [
{start_p: 0, end_p: 100, start_t: '<p>', end_t: '</p>'},
{start_p: 10, end_p: 50, start_t: '<p>', end_t: '</p>'},
{start_p: 0, end_p: 100, start_t: '<span>', end_t: '</span>'},
{start_p: 20, end_p: 30, start_t: '<em>', end_t: '</em>'},
{start_p: 40, end_p: 50, start_t: '<em>', end_t: '</em>'},
{start_p: 50, end_p: 60, start_t: '<em>', end_t: '</em>'},
{start_p: 70, end_p: 80, start_t: '<em>', end_t: '</em>'},
{start_p: 8, end_p: 99, start_t: '<strong>', end_t: '</strong>'}
]
expected_output: [<p><span><strong><p><em></em><em></em></p><em></em><em></em></strong></span></p>]
instead of just tags in output, each tag should be a Hash with position and tag, like:
{position: 0, tag: '<p>'}
The most important thing it to be ordered in right order, respecting HTML rules of not having intersecting tags (if more than one tags are ending on the same position, the one opened last should go first, if one ends and other opens on the same position, end will be first, etc).
This is part of a legacy system and input and output can't be changed at the moment. Also, the input can be very big (hundreds of thousands of elements)
Any better solution than just brute force recursive ?
input.group_by { |h| h[:start_p] }.
values.
flat_map do |a|
x = 1.0
a.flat_map do |h|
x /= 2.0
[[h[:start_p] += x, h[:start_t]], [h[:end_p] -= x, h[:end_t]]]
end
end.sort_by(&:first).map(&:last).join
#=> "<span><p><strong><p><em></em><em></p></em><em></em><em></em></strong></p></span>"
The steps are as follows.
b = input.group_by { |h| h[:start_p] }
#=> { 0=>[{:start_p=>0, :end_p=>100, :start_t=>"<p>", :end_t=>"</p>"},
# {:start_p=>0, :end_p=>100, :start_t=>"<span>", :end_t=>"</span>"}],
# 10=>[{:start_p=>10, :end_p=>50, :start_t=>"<p>", :end_t=>"</p>"}],
# 20=>[{:start_p=>20, :end_p=>30, :start_t=>"<em>", :end_t=>"</em>"}],
# 40=>[{:start_p=>40, :end_p=>50, :start_t=>"<em>", :end_t=>"</em>"}],
# 50=>[{:start_p=>50, :end_p=>60, :start_t=>"<em>", :end_t=>"</em>"}],
# 70=>[{:start_p=>70, :end_p=>80, :start_t=>"<em>", :end_t=>"</em>"}],
# 8=>[{:start_p=> 8, :end_p=>99, :start_t=>"<strong>", :end_t=>"</strong>"}]}
c = b.values
#=> [[{:start_p=>0, :end_p=>100, :start_t=>"<p>", :end_t=>"</p>"},
# {:start_p=>0, :end_p=>100, :start_t=>"<span>", :end_t=>"</span>"}],
# [{:start_p=>10, :end_p=>50, :start_t=>"<p>", :end_t=>"</p>"}],
# ...
# [{:start_p=>8, :end_p=>99, :start_t=>"<strong>", :end_t=>"</strong>"}]]
d = c.flat_map do |a|
x = 1.0
a.flat_map do |h|
x /= 2.0
[[h[:start_p] += x, h[:start_t]], [h[:end_p] -= x, h[:end_t]]]
end
end
#=> [[0.5, "<p>"], [99.5, "</p>"], [0.25, "<span>"], [99.75, "</span>"],
# [10.5, "<p>"], [49.5, "</p>"], [20.5, "<em>"], [29.5, "</em>"],
# [40.5, "<em>"], [49.5, "</em>"], [50.5, "<em>"], [59.5, "</em>"],
# [70.5, "<em>"], [79.5, "</em>"], [8.5, "<strong>"], [98.5, "</strong>"]]
The first four element of d (tuples) are the most important to understanding the approach I've taken.
e = d.sort_by(&:first)
#=> [[0.25, "<span>"], [0.5, "<p>"], [8.5, "<strong>"], [10.5, "<p>"],
# [20.5, "<em>"], [29.5, "</em>"], [40.5, "<em>"], [49.5, "</p>"],
# [49.5, "</em>"], [50.5, "<em>"], [59.5, "</em>"], [70.5, "<em>"],
# [79.5, "</em>"], [98.5, "</strong>"], [99.5, "</p>"], [99.75, "</span>"]]
f = e.map(&:last)
#=> ["<span>", "<p>", "<strong>", "<p>", "<em>", "</em>", "<em>", "</p>",
# "</em>", "<em>", "</em>", "<em>", "</em>", "</strong>", "</p>", "</span>"]
f.join
#=> "<span><p><strong><p><em></em><em></p></em><em></em><em></em></strong></p></span>"
I will elaborate the calculation of d above if requested to do so.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With