use regex to better parse data for SQL loading

This commit is contained in:
Paul Trowbridge 2020-01-29 13:30:53 -05:00
parent 022240f8d3
commit 99905a9341

View File

@ -2274,7 +2274,7 @@ Function MISCe_col_to_letter(ByRef x As Long) As String
End Function End Function
Public Function SQLp_build_sql_values(ByRef tbl() As String, trim As Boolean, headers As Boolean, syntax As SQLsyntax) As String Public Function SQLp_build_sql_values(ByRef tbl() As String, trim As Boolean, headers As Boolean, syntax As SQLsyntax, ByRef quote_headers As Boolean) As String
Dim i As Long Dim i As Long
@ -2284,6 +2284,17 @@ Public Function SQLp_build_sql_values(ByRef tbl() As String, trim As Boolean, he
Dim type_flag() As String Dim type_flag() As String
Dim col_name As String Dim col_name As String
Dim start_row As Long Dim start_row As Long
Dim rx As Object
Dim strip_text As String
Dim strip_num As String
Dim strip_date As String
Set rx = CreateObject("vbscript.regexp")
rx.Global = True
strip_text = "[^a-zA-Z0-9 \-\_\,\#\""]"
strip_num = "[^0-9\.]"
strip_date = "[^0-9\\\-\:\.]"
ReDim type_flag(UBound(tbl, 1)) ReDim type_flag(UBound(tbl, 1))
For j = 0 To UBound(tbl, 1) For j = 0 To UBound(tbl, 1)
@ -2306,16 +2317,22 @@ Public Function SQLp_build_sql_values(ByRef tbl() As String, trim As Boolean, he
End If End If
Next j Next j
rx.Pattern = strip_text
If headers Then If headers Then
start_row = 1 start_row = 1
For i = 0 To UBound(tbl, 1) For i = 0 To UBound(tbl, 1)
If i > 0 Then col_name = col_name & "," If i > 0 Then col_name = col_name & ","
col_name = col_name & tbl(i, 0) If quote_headers Then
col_name = col_name & """" & rx.Replace(tbl(i, 0), "") & """"
Else
col_name = col_name & rx.Replace(tbl(i, 0), "")
End If
Next i Next i
Else Else
start_row = 0 start_row = 0
End If End If
For i = start_row To UBound(tbl, 2) For i = start_row To UBound(tbl, 2)
rec = "" rec = ""
If i <> start_row Then sql = sql & "," & vbCrLf If i <> start_row Then sql = sql & "," & vbCrLf
@ -2324,35 +2341,40 @@ Public Function SQLp_build_sql_values(ByRef tbl() As String, trim As Boolean, he
If j <> 0 Then rec = rec & "," If j <> 0 Then rec = rec & ","
Select Case type_flag(j) Select Case type_flag(j)
Case "N" '-------N = numeric but should probably be N for numeric---- Case "N" '-------N = numeric but should probably be N for numeric----
rx.Pattern = strip_num
If tbl(j, i) = "" Then If tbl(j, i) = "" Then
rec = rec & "CAST(NULL AS NUMERIC)" rec = rec & "CAST(NULL AS NUMERIC)"
Else Else
rec = rec & Replace(Replace(tbl(j, i), "'", "''"), ",", "") rec = rec & rx.Replace(tbl(j, i), "")
End If End If
Case "S" '-------S = string------------------------------------------ Case "S" '-------S = string------------------------------------------
rx.Pattern = strip_text
If LTrim(RTrim(tbl(j, i))) = "" Then If LTrim(RTrim(tbl(j, i))) = "" Then
rec = rec & "CAST(NULL AS VARCHAR(255))" rec = rec & "CAST(NULL AS VARCHAR(255))"
Else Else
If trim Then If trim Then
rec = rec & "'" & LTrim(RTrim(Replace(tbl(j, i), "'", "''"))) & "'" rec = rec & "'" & LTrim(RTrim(rx.Replace(tbl(j, i), ""))) & "'"
Else Else
rec = rec & "'" & Replace(tbl(j, i), "'", "''") & "'" rec = rec & "'" & rx.Replace(tbl(j, i), "") & "'"
End If End If
End If End If
Case "D" '-------D = date--------------------------------------------- Case "D" '-------D = date---------------------------------------------
rx.Pattern = strip_date
If LTrim(RTrim(tbl(j, i))) = "" Then If LTrim(RTrim(tbl(j, i))) = "" Then
rec = rec & "CAST(NULL AS DATE)" rec = rec & "CAST(NULL AS DATE)"
Else Else
rec = rec & "CAST('" & tbl(j, i) & "' AS DATE)" rec = rec & "CAST('" & rx.Replace(tbl(j, i), "") & "' AS DATE)"
End If End If
Case Else '-------Assume text------------------------------------------ Case Else '-------Assume text------------------------------------------
rx.Pattern = strip_text
If LTrim(RTrim(tbl(j, i))) = "" Then If LTrim(RTrim(tbl(j, i))) = "" Then
rec = rec & "CAST(NULL AS VARCHAR(255))" rec = rec & "CAST(NULL AS VARCHAR(255))"
Else Else
If trim Then If trim Then
rec = rec & "'" & LTrim(RTrim(Replace(tbl(j, i), "'", "''"))) & "'" rec = rec & "'" & LTrim(RTrim(rx.Replace(tbl(j, i), ""))) & "'"
Else Else
rec = rec & "'" & Replace(tbl(j, i), "'", "''") & "'" rec = rec & "'" & rx.Replace(tbl(j, i), "") & "'"
End If End If
End If End If
End Select End Select