複数の変数(col1)があり、一部のレベル(col2)には3つの値(col 3,4,5)があります。
mango123 L1 1 2 3
mango123 L2 4 5 6
squash378 L1 7 8 9
squash378 L5 10 12 13
squash378 L15 0 1 3
orange765 L1 11 1 3
mango123 L11 11 12 23
変数ペアのリストが与えられたら、その値の合計を探し、いくつかの変数が存在しない場合は、最後の列にその変数を指定したいと思います。ペアリングに使用できる各レベルを合計する必要があります。
与えられた入力
mango123 squash378
squash378 orange765
pear546 lime436
最後の列にどの変数にどのレベルのデータが含まれていないかを示す出力が必要です。
mango123 squash378 L1 8 10 12 None
mango123 squash378 L2 4 5 6 squash378
mango123 squash378 L11 11 12 23 squash378
mango123 squash378 L5 10 12 13 mango123
mango123 squash378 L15 0 1 3 mango123
squash378 orange765 L15 0 1 3 orange765
squash378 orange765 L1 18 9 12 None
squash378 orange765 L5 10 12 13 orange765
pear546 lime436 Na Na Na Na both
私の試みは次のとおりです。これでどこにも行けないようです。
awk 'NR=FNR{ a[$1]=$3; b[$1]=$4; c[$1]=$5; next }
if (($1 in a) && !($2 in a )) { $3=a[$1] ; $4 = b[$1]; $5=c[$1];$6=$2}
if (!($1 in a) && ($2 in a )) { $3=a[$1] ; $4 = b[$1]; $5=c[$1];$6=$1}
if (($1 in a) && ($2 in a )) { $3+=a[$1] ; $4+ = b[$1]; $5+=c[$1];$6="None"}
if (!($1 in a) && !($2 in a )) { $3="Na" ; $4 = "Na"; $5="Na";$6="Both"}1' file1 file2
答え1
awk '# Process "0pairs_file". Build a "pair" array (keys only).
# Include leading an trailing space to unambiguously search of each field value via index().
NR==FNR{ pair[NR]=" "$1" "$2" "; next }
# Determine number of records in "pairs_file"0.
FNR==1{ pz=NR-1 } # "pz" size of "pair" array
# Process "main_file"
# For each record in "main_file",
# check if "$1" is found in any of the "pair" elements
{ for( pi=1; pi<=pz; pi++ ){
p = pair[pi] # Note that "p" has a leading and triling space
pix = index( p, " "$1" " ) # Get char indes (1-based position) of " "$1" " in "pair" element
# When $1 is found, pix > 0
if( pix--){ # "pix--" decrements pix after the test
pl=p$2" "; plevel[pl]
# Build a "data" array for
# presence of 1st and 2nd fruit, and for
# the input fields $3 and $4 and $5
# Use "pl" plus a corresponding numeric suffixes for "data" keys
if( pix ){ data[pl 2]=1 } # pix != 0 so it must be the 2nd fruit of the pair
else { data[pl 1]=1 } # pix == 0 so it must be the 1st fruit of the pair
for( v=3; v<=5; v++ ) data[pl v]+=$v
}
}
}
END{# process by pair
for(pi=1; pi<=pz; pi++){
p=pair[pi]
na=1 # flag for when pair is N/A
# Process by level
for( plk in plevel ){
if( index( plk, p ) == 1 ){
na=0
printf "%s", substr(plk,2)
for( v=3; v<=5; v++ ) printf "%s ", data[plk v]
if( data[plk 1] != data[plk 2] ){
split(p,u)
if ( data[plk 1] ) printf "%s", u[2]
else printf "%s", u[1]
}
else { printf "None" }
print ""
}
}
if( na ) print substr(p,2) "Na Na Na Na both"
}
}' pairs_file main_file | column -t -s' '
「ペア」グループを「レベル」で並べ替えるには、次の2行を置き換えます。
# Process by level
for( plk in plevel ){
次の行に置き換えてください。
# Sort by level
plk=""; for( pl in plevel )
if( index( pl, p ) == 1 ) plk = plk sprintf( "%s\n", pl )
cmd = "printf \"%s\" \""plk"\" | sort -V"
ppli=0; while( ( cmd | getline pplk) > 0 )
pplkeys[++ppli] = pplk
close(cmd)
pplz=ppli # "pplz" size of "pplkeys" array
# Process by level
for(ppli=1; ppli<=pplz; ppli++){
plk = pplkeys[ppli]
テーブル形式の出力が必要な場合は、次のようにパイプします。
awk '...' pairs_file main_file | column -t -s' '
出力:レベル(ペアグループ)で並べ替えてテーブルで作成column
mango123 squash378 L1 8 10 12 None
mango123 squash378 L2 4 5 6 squash378
mango123 squash378 L5 10 12 13 mango123
mango123 squash378 L11 11 12 23 squash378
mango123 squash378 L15 0 1 3 mango123
squash378 orange765 L1 18 9 12 None
squash378 orange765 L5 10 12 13 orange765
squash378 orange765 L15 0 1 3 orange765
pear546 lime436 Na Na Na Na both