Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

concatenate multi values in one record without duplication

Tags:

python

vba

I have a dbf table like below which is the result of one to many join from two tables. I want to have unique zone values from one Taxlot id field.

table name: input table
tid ----- zone
1 ------ A
1 ------ A
1 ------ B
1 ------ C
2 ------ D
2 ------ E
3 ------ C

Desirable output table table name: input table
tid ----- zone
1 ------ A, B, C
2 ------ D, E
3 ------ C

I got some help but couldn't make it to work.

inputTbl = r"C:\temp\input.dbf"
taxIdZoningDict = {}
searchRows = gp.searchcursor(inputTbl)
searchRow = searchRows.next()
while searchRow:
   if searchRow.TID in taxIdZoningDict:
      taxIdZoningDict[searchRow.TID].add(searchRow.ZONE)
   else:
      taxIdZoningDict[searchRow.TID] = set() #a set prevents dulpicates!
      taxIdZoningDict[searchRow.TID].add(searchRow.ZONE)
   searchRow = searchRows.next()

outputTbl = r"C:\temp\output.dbf"
gp.CreateTable_management(r"C:\temp", "output.dbf")
gp.AddField_management(outputTbl, "TID", "LONG")
gp.AddField_management(outputTbl, "ZONES", "TEXT", "", "", "20")
tidList = taxIdZoningDict.keys()
tidList.sort() #sorts in ascending order
insertRows = gp.insertcursor(outputTbl)
for tid in tidList:
   concatString = ""
   for zone in taxIdZoningDict[tid]
      concatString = concatString + zone + ","
   insertRow = insertRows.newrow()
   insertRow.TID = tid
   insertRow.ZONES = concatString[:-1]
   insertRows.insertrow(insertRow)
del insertRow
del insertRows
like image 722
user295216 Avatar asked Mar 17 '10 16:03

user295216


2 Answers

I would use my dbf module and defaultdict to greatly simplify that code:

import dbf
from collections import defaltdict

inputTbl = dbf.Table(r'c:\temp\input.dbf')
taxIdZoning = defaultdict(set)

for record in inputTbl:
    taxIdZoning[record.tid].add(record.zone)
inputTbl.close()

outputTbl = dbf.Table(r'c:\temp\output.dbf', 'tid N(17.0), zones C(20)')
for tid in sorted(taxIdZoning):
    record = outputTbl.append()
    record.tid = tid
    record.zones = ','.join(sorted(taxIdZoning[tid]))
outputTbl.close()

NOTE: field names are lowercase, and I wasn't sure exactly how to represent LONG, but hopefully 17 digits is enough. :) My apologies for any bugs -- hard to test without input files.

like image 171
Ethan Furman Avatar answered Sep 29 '22 20:09

Ethan Furman


This worked for me using both Microsoft Access VBA and Microsoft Excel VBA. It isn't very efficient code, but it works. I was able to open the resultant file in both Access and Excel.

Set the sDBF* and sOutDBF* variables to tailor this to your own custom paths.

Sub VBASolution()
    Dim oRS
    Dim sConn
    Dim sDBFPath, sOutDBFPath
    Dim sDBFName, sOutDBFName
    Dim oDict
    Dim curTID, curZone, sZones
    Dim oConn
    Dim oFS
    Dim sTableName

    sDBFPath = "C:\Path\To\DBFs\"
    sDBFName = "Input.dbf"

    sOutDBFPath = "C:\Path\To\DBFs\"
    sOutDBFName = "RESULTS.dbf"

    sConn = "Driver={Microsoft dBASE Driver (*.dbf)}; DriverID=277; Dbq="
    Set oRS = CreateObject("ADODB.Recordset")


    oRS.Open "SELECT DISTINCT tid, zone FROM " & sDBFName, sConn & sDBFPath

    Set oDict = CreateObject("Scripting.Dictionary")

    Do While Not oRS.EOF
        curTID = oRS.Fields("tid").Value
        curZone = oRS.Fields("zone").Value
        If Not oDict.Exists(curTID) Then
            oDict.Add curTID, CreateObject("Scripting.Dictionary")
        End If
        If Not oDict(curTID).Exists(curZone) Then
            oDict(curTID).Add curZone, curZone
        End If
        oRS.MoveNext
    Loop
    oRS.Close

    Set oRS = Nothing

    Set oConn = CreateObject("ADODB.Connection")
    oConn.Open sConn & sOutDBFPath

    'Delete the resultant DBF file if it already exists.
    Set oFS = CreateObject("Scripting.FileSystemObject")
    With oFS
        If .FileExists(sOutDBFPath & "\" & sOutDBFName) Then
            .DeleteFile sOutDBFPath & "\" & sOutDBFName
        End If
    End With

    sTableName = oFS.GetBaseName(sOutDBFName)

    oConn.Execute "CREATE TABLE " & sTableName & " (tid int, zone varchar(80))"

    Dim i, j
    For Each i In oDict.Keys
        curTID = i
        sZones = ""
        For Each j In oDict(i)
            sZones = sZones & "," & j
        Next
        sZones = Mid(sZones, 2)
        oConn.Execute "INSERT INTO " & sTableName & " (tid, zone) VALUES ('" & curTID & "','" & sZones & "')"
    Next
    oConn.Close

    Set oConn = Nothing
    Set oDict = Nothing
    Set oFS = Nothing
End Sub

EDIT: For what it's worth, this also worked for me by inserting it into a VBScript .VBS file (text) in Windows XP and adding this line to the bottom of the file:

Call VBASolution()

I do not know whether or not it requires Office to be installed or if the appropriate dbf drivers come with Windows.

like image 43
transistor1 Avatar answered Sep 29 '22 20:09

transistor1