csvファイルの特定の列にパイプで区切られた重複値があることを確認してください。

Question 1

使用できる特別なツールがほとんどないことを除いてデータセット（例えばCSVツール)

awk -F"|" '
    {
        r = $w SUBSEP $x SUBSEP $y SUBSEP $z #prepare index from 4 fields data
    }
    R[r]{                                    #if index present in array already
        if ( R[r] != 1){                     #if it is a first repetition
            print R[r]                       #print line stored in array
            R[r] = 1                         #mark element «not a first time»
        }
        print                                #print present line
        next                                 #pass rest of code(goto next line)
    }
    {
        R[r] = $0                            #store line in array (first time only)
    }
    ' w=$ColumnNo1 x=$ColumnNo2 y=$ColumnNo3 z=$ColumnNo4 file.csv

Answer

使用できる特別なツールがほとんどないことを除いてデータセット（例えばCSVツール)

awk -F"|" '
    {
        r = $w SUBSEP $x SUBSEP $y SUBSEP $z #prepare index from 4 fields data
    }
    R[r]{                                    #if index present in array already
        if ( R[r] != 1){                     #if it is a first repetition
            print R[r]                       #print line stored in array
            R[r] = 1                         #mark element «not a first time»
        }
        print                                #print present line
        next                                 #pass rest of code(goto next line)
    }
    {
        R[r] = $0                            #store line in array (first time only)
    }
    ' w=$ColumnNo1 x=$ColumnNo2 y=$ColumnNo3 z=$ColumnNo4 file.csv

Question 2

次のことを試してください（シェル変数は整数でなければなりません）。

awk -v C1="$ColumnNo1" -v C2="$ColumnNo2" -v C3="$ColumnNo3" -v C4="$ColumnNo4" -F'|' '
       { a1[$C1]++; a2[$C2]++; a3[$C3]++; a4[$C4]++}
       END {
       printf "Non-unique entries in column %d\n", C1 
       for (key in a1) {              
         if (a1[key] > 1) print key
       }
       printf "Non-unique entries in column %d\n", C2
       for (key in a2) {              
         if (a2[key] > 1) print key
       }
       printf "Non-unique entries in column %d\n", C3
       for (key in a3) {               
         if (a3[key] > 1) print key
       }
       printf "Non-unique entries in column %d\n", C4
       for (key in a4) {               
         if (a4[key] > 1) print key
       }}' <myfile.csv

Answer

次のことを試してください（シェル変数は整数でなければなりません）。

awk -v C1="$ColumnNo1" -v C2="$ColumnNo2" -v C3="$ColumnNo3" -v C4="$ColumnNo4" -F'|' '
       { a1[$C1]++; a2[$C2]++; a3[$C3]++; a4[$C4]++}
       END {
       printf "Non-unique entries in column %d\n", C1 
       for (key in a1) {              
         if (a1[key] > 1) print key
       }
       printf "Non-unique entries in column %d\n", C2
       for (key in a2) {              
         if (a2[key] > 1) print key
       }
       printf "Non-unique entries in column %d\n", C3
       for (key in a3) {               
         if (a3[key] > 1) print key
       }
       printf "Non-unique entries in column %d\n", C4
       for (key in a4) {               
         if (a4[key] > 1) print key
       }}' <myfile.csv

csvファイルの特定の列にパイプで区切られた重複値があることを確認してください。

答え1

答え2

関連情報