Option Explicit
'обрабатываемый файл
Const opt_ProcessingFile = "H:\_VBS, WSH\DuplicatesChecker\test.txt"
'кодировка файла
Const opt_Charset = "utf-8"
'минимальное число повторов для отображения
Const opt_minDuplicates = 2
Private Function GetRandomColor() As Long
Static prevIndex As Long
Dim rndIndex As Long
Dim listColors 'наиболее заметные цвета для глаза, с комфортно читаемым текстом на их фоне (не слишком светлые и не слишком тёмные)
listColors = Array(3, 4, 6, 7, 8, 10, 12, 14, 16, 17, 18, 20, 22, 24, 26, 27, 28, 33, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 50)
rndIndex = Int(rnd() * (UBound(listColors) + 1))
If rndIndex = prevIndex Then 'предотвратить повторение цвета
GetRandomColor = GetRandomColor()
Else
GetRandomColor = listColors(rndIndex)
prevIndex = rndIndex
End If
End Function
Function ReadTextFile(path As String, charset As String, out_Text As String) As Boolean
On Error GoTo ErrH:
Dim objStream As Object
Set objStream = CreateObject("ADODB.Stream")
objStream.charset = charset
objStream.Open
objStream.LoadFromFile (path)
out_Text = objStream.ReadText()
objStream.Close
ReadTextFile = True
Exit Function
ErrH:
MsgBox "Error: " & Err.Number & " - " & Err.Description
End Function
Function PrintFile(path As String, charset As String, out_NumLines As Long) As Boolean
Dim sContent As String
If (ReadTextFile(path, charset, sContent)) Then
Dim arr, i&
sContent = Replace$(sContent, vbCr, vbNullString)
arr = Split(sContent, vbLf)
For i = 0 To UBound(arr)
With Cells(i + 1, 1)
.NumberFormat = "@"
.Value = arr(i)
End With
Next
out_NumLines = UBound(arr) + 1
PrintFile = True
End If
End Function
Function SanitizeLex(lex As String) As String
SanitizeLex = Trim(lex)
End Function
Private Sub CreateHyperlinks(y As Long, ColumnStartIdx As Long, aNumLines)
Dim i As Long, x As Long, nLineNumber As Long
For i = 0 To UBound(aNumLines)
x = ColumnStartIdx + i
nLineNumber = aNumLines(i)
Dim txt As String
txt = "{" & CStr(nLineNumber) & "}"
ActiveSheet.Hyperlinks.Add Anchor:=Cells(y, x), Address:="", SubAddress:= _
Cells(nLineNumber, 1).Address, TextToDisplay:=txt
Next
End Sub
Public Sub ShowDuplicates()
Const ColumnLinksStart As String = "H"
Const SplitterNumLines As String = "*"
If (MsgBox("Информация с текущей страницы Excel будет стерта" & vbNewLine & "Продолжить?", vbQuestion + vbYesNo) = vbNo) Then Exit Sub
Cells.Clear
'читаем файл
Dim numLines As Long
If (Not PrintFile(opt_ProcessingFile, opt_Charset, numLines)) Then Exit Sub
Dim oDict As Object
Set oDict = CreateObject("Scripting.Dictionary")
'парсинг
Dim y As Long, sLex As String
For y = 1 To numLines
sLex = SanitizeLex(Cells(y, 1))
If (Len(sLex) <> 0) Then
If Not oDict.Exists(sLex) Then 'поиск повторов
oDict.Add sLex, CStr(y) 'запись номера строки
Else
oDict(sLex) = oDict(sLex) & SplitterNumLines & CStr(y)
End If
End If
Next
' отбор и выделение результатов
Dim vKey, vNumLines, iNumLines&, iColor&, i&, k&, x&, RA As Range, ColumnStartIdx As Long
ColumnStartIdx = Columns(ColumnLinksStart).Column
For Each vKey In oDict.Keys
vNumLines = Split(oDict(vKey), SplitterNumLines)
iNumLines = UBound(vNumLines) + 1
If iNumLines >= opt_minDuplicates Then
iColor = GetRandomColor()
For i = 0 To UBound(vNumLines)
Set RA = Cells(CLng(vNumLines(i)), 1)
' подсвечиваем дубликаты одним цветом
RA.Interior.ColorIndex = iColor
' создаём якорные гиперссылки на строки дубликатов
Call CreateHyperlinks(RA.Row, ColumnStartIdx, vNumLines)
Next
End If
Next
End Sub