Im using the below code to extract sizes from excel cells, which has 7 different patterns..
The below code is for extracting OZ sizes however, i need to do the same ml and gram (g). Hence, the way i wrote the code will be very lengthy.
| Value | Result |
|---|---|
| Size:0.028 oz x 5 | 0.028 oz x 5 |
| Size:6x0.04 oz + 30 oz | 6 x0.04 oz |
| Size:8 x 0.03 oz | 8 x 0.03 oz |
| Size:2 x 0.07 oz | 2 x 0.07 oz |
| Size:5 x 0.028 oz | 5 x 0.028 oz |
| Size:0.028 ozx5 | 0.028 ozx5 |
| Size:0.028 oz | 0.028 oz |
| Size:30.00 oz | 30 .00 oz |
| Size:2 * 0.07 oz | 2 * 0.07 oz |
| Size:0.028 oz * 5 | 0.028 oz * 5 |
| Size:2*0.07 oz | 2 *0.07 oz |
| Size:0.028 oz*5 | 0.028 oz*5 |
| Size:3.00 oz | 3 .00 oz |
| Size:3.00 oz | 3 .00 oz |
| Color:01UNIVERSA#||#Size:1.00 oz | 1 .00 oz |
| Lancôme Effacil Eye Makeup Remover, 4.2 Fl. Oz. | 0 . oz |
| Confidence In A Cleanser, 1 fl. oz, Travel Size | 1 fl. oz |
Function getozv1(str As String)
Dim n As Long, unit As String, nums() As Variant
Static rgx As Object, cmat As Object
'with rgx as static, it only has to be created once; beneficial when filling a long column with this UDF
If rgx Is Nothing Then
Set rgx = CreateObject("VBScript.RegExp")
End If
getsize = vbNullString
cnt = 0
With rgx
.Global = True
.MultiLine = False
.Pattern = "[0-9]{1,2}\sx\s[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)" 'for Size:2 x 0.5 oz
If .test(str) Then
Set cmat = .Execute(str)
'resize the nums array to accept the matches
ReDim nums(cmat.Count - 1)
'get measurement unit
unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare)
'populate the nums array with the matches
For n = LBound(nums) To UBound(nums)
nums(n) = Val(cmat.Item(n))
cnt = cnt + 1
Next n
If cnt > 1 Then
getozv1 = "Set"
Else
'convert the nums array to a subtotal with unit suffix
getozv1 = LCase(Application.Sum(nums) & " " & unit)
End If
End If
If Not .test(str) Then
.Pattern = "[0-9]{1,2}x[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)" ''for no space Size:2x0.5 oz
If .test(str) Then
Set cmat = .Execute(str)
'resize the nums array to accept the matches
ReDim nums(cmat.Count - 1)
'get measurement unit
unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare)
'populate the nums array with the matches
For n = LBound(nums) To UBound(nums)
nums(n) = Val(cmat.Item(n))
cnt = cnt + 1
Next n
If cnt > 1 Then
getozv1 = "Set"
Else
'convert the nums array to a subtotal with unit suffix
getozv1 = LCase(Application.Sum(nums) & " " & unit)
End If
End If
End If
If Not .test(str) Then
.Pattern = "[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)\sx\s[0-9]{1,2}" 'for Size:0.028 oz x 5/ 0.8g
'pattern2 without space [0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)x[0-9]{1,2}
If .test(str) Then
Set cmat = .Execute(str)
'resize the nums array to accept the matches
ReDim nums(cmat.Count - 1)
'get measurement unit
unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare)
'populate the nums array with the matches
For n = LBound(nums) To UBound(nums)
nums(n) = Val(cmat.Item(n))
cnt = cnt + 1
Next n
If cnt > 1 Then
getozv1 = "Set"
Else
'convert the nums array to a subtotal with unit suffix
getozv1 = LCase(Application.Sum(nums) & " " & unit)
End If
End If
End If
If Not .test(str) Then
.Pattern = "[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)x[0-9]{1,2}" 'for without Size:0.028 oz x 5/ 0.8g
If .test(str) Then
Set cmat = .Execute(str)
'resize the nums array to accept the matches
ReDim nums(cmat.Count - 1)
'get measurement unit
unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare)
'populate the nums array with the matches
For n = LBound(nums) To UBound(nums)
nums(n) = Val(cmat.Item(n))
cnt = cnt + 1
Next n
If cnt > 1 Then
getozv1 = "Set"
Else
'convert the nums array to a subtotal with unit suffix
getozv1 = LCase(Application.Sum(nums) & " " & unit)
End If
End If
End If
If Not .test(str) Then
.Pattern = "[0-9]{1,2}\s\*\s[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)" 'Size:2 * 0.07 oz
If .test(str) Then
Set cmat = .Execute(str)
'resize the nums array to accept the matches
ReDim nums(cmat.Count - 1)
'get measurement unit
unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare)
'populate the nums array with the matches
For n = LBound(nums) To UBound(nums)
nums(n) = Val(cmat.Item(n))
cnt = cnt + 1
Next n
If cnt > 1 Then
getozv1 = "Set"
Else
'convert the nums array to a subtotal with unit suffix
getozv1 = LCase(Application.Sum(nums) & " " & unit)
End If
End If
End If
If Not .test(str) Then
.Pattern = "[0-9]{1,2}\*[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)" 'no space Size:2*0.07 oz
If .test(str) Then
Set cmat = .Execute(str)
'resize the nums array to accept the matches
ReDim nums(cmat.Count - 1)
'get measurement unit
unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare)
'populate the nums array with the matches
For n = LBound(nums) To UBound(nums)
nums(n) = Val(cmat.Item(n))
cnt = cnt + 1
Next n
If cnt > 1 Then
getozv1 = "Set"
Else
'convert the nums array to a subtotal with unit suffix
getozv1 = LCase(Application.Sum(nums) & " " & unit)
End If
End If
End If
If Not .test(str) Then
.Pattern = "[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)\s\*\s[0-9]{1,2}" 'Size:0.028 oz * 5
If .test(str) Then
Set cmat = .Execute(str)
'resize the nums array to accept the matches
ReDim nums(cmat.Count - 1)
'get measurement unit
unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare)
'populate the nums array with the matches
For n = LBound(nums) To UBound(nums)
nums(n) = Val(cmat.Item(n))
cnt = cnt + 1
Next n
If cnt > 1 Then
getozv1 = "Set"
Else
'convert the nums array to a subtotal with unit suffix
getozv1 = LCase(Application.Sum(nums) & " " & unit)
End If
End If
End If
If Not .test(str) Then
.Pattern = "[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)\*[0-9]{1,2}" 'Size:0.028 oz*5
If .test(str) Then
Set cmat = .Execute(str)
'resize the nums array to accept the matches
ReDim nums(cmat.Count - 1)
'get measurement unit
unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare)
'populate the nums array with the matches
For n = LBound(nums) To UBound(nums)
nums(n) = Val(cmat.Item(n))
cnt = cnt + 1
Next n
If cnt > 1 Then
getozv1 = "Set"
Else
'convert the nums array to a subtotal with unit suffix
getozv1 = LCase(Application.Sum(nums) & " " & unit)
End If
End If
End If
If Not .test(str) Then
.Pattern = "([0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)){1,3}" 'base pattern
If .test(str) Then
Set cmat = .Execute(str)
'resize the nums array to accept the matches
ReDim nums(cmat.Count - 1)
'get measurement unit
unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare)
'populate the nums array with the matches
For n = LBound(nums) To UBound(nums)
nums(n) = Val(cmat.Item(n))
cnt = cnt + 1
Next n
If cnt > 1 Then
getozv1 = "Set"
Else
'convert the nums array to a subtotal with unit suffix
getozv1 = LCase(Application.Sum(nums) & " " & unit)
End If
End If
End If
End With
End Function
How to do we do this in a efficient way? can we combine these 7 patterns without impacting result/accuracy?
Or is there better way to do this?
Few examples for gram(g) ml
| Data | Expected result | Note |
|---|---|---|
| MamaDerma Stretch Mark Repair Cream - 100 ml | 100 ml | |
| MamaDerma Stretch Mark Prevention Oil - 60 ml | 60 ml | |
| Supermood Egoboost Moisture Kick Serum, 30ml | 30ml | |
| Size:15 mL x3 | 15 ml x3 | |
| Size:Mini Size - 7 g | 7 g | |
| Type: 4 g Ampoule 40 g Rubber™ Mask with Moisturizing Hyaluronic Acid | Set | multiple sizes |
| Size:Standard Size- 5.0 g-golden pink | 5.0 g | |
| Size:Mini Size - 7 g | 7 g | |
| Size:Standard Size - 21 g | 21 g | |
| Size:Mini Size Translucent - 5.4 g | 5.4 g | |
| 05浅色/陶瓷白 7g | 7 g | |
| Size: 28ml*4 | 28ml*4 | |
| Size:20g*10 | 20g*10 | |
| Size: (2ml+3ml)×4ml | Set | multiple sizes |
| Size4*18ml | 4*18ml | |
| Size: 27g*5ea | 27g*5 | |
| Size 4.6ml × 4 | 4.6ml × 4 | |
| Size 4 x 4.6ml | 4 x 4.6ml | |
| Size 4.6ml×4 | 4.6ml×4 | |
| Size 4x4.6ml | 4x4.6ml | |
| Size: 28ml * 4 | 28ml * 4 | |
| Size:20g * 10 | 20g * 10 | |
| 27mlx3 片 | 27mlx3 | |
| Size: 12 x 1.5g | 12 x 1.5g | |
| Size: 12x1.5g | 12x1.5g |
It is not too difficult to develop a single regex that works for all of your examples.
I suggest:
Option Explicit
'set reference to Microsoft VBcript Regular Expressions 5.5
Function getAmt(S As String) As String
Dim RE As RegExp, MC As MatchCollection
Const sPat As String = "(?:\d+\s*[x*]\s*)?\d+(?:\.\d+)?\s*(?=(g|ml|(?:fl)?\.?\s*oz))\1(?:\s*[×x*]\s*\d+)?"
Set RE = New RegExp
With RE
.Global = True
.IgnoreCase = True
.Pattern = sPat
Set MC = .Execute(S)
Select Case MC.Count
Case 0
getAmt = "no amount"
Case 1
getAmt = MC(0)
Case Is > 1
getAmt = "Set"
End Select
End With
End Function

It works on all of your examples (except for the obvious typos)
Here is a detailed explanation of the regex:
(?:\d+\s*[x*]\s*)?\d+(?:\.\d+)?\s*(?=(g|ml|(?:fl)?\.?\s*oz))\1(?:\s*[×x*]\s*\d+)?
Options: Case insensitive
(?:\d+\s*[x*]\s*)?
?\d+
+\s*
*[x*]\s*
*\d+
+(?:\.\d+)?
?\.\d+
+\s*
*(?=(g|ml|(?:fl)?\.?\s*oz))
(g|ml|(?:fl)?\.?\s*oz)
g
gml
ml(?:fl)?\.?\s*oz
(?:fl)?
?fl\.?
?\s*
*oz\1(?:\s*[×x*]\s*\d+)?
?\s*
*[×x*]\s*
*\d+
+Created with RegexBuddy
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With