特定の基準に基づいて行を並べ替え、他の行をマージします。

Question 1

このGNU awkスクリプトは私に適しています。

#! /usr/local/bin/awk -f
BEGIN { FS = "[[:space:]][[:space:]]+" }
function dump() {
    for (acct in post) { # dump unmerged postings of current transaction
        if (post[acct])
            print post[acct];
    }
    if (merged) {   # dump merged posting, if any
        printf "    %s\n", merged
    }
    merged = "";    # clear variables for next round
    delete post;
    txn = "";
}
!NF && txn {        # blank line, end of transaction
    dump();
    print;
    next
} 
END { # end-of-file, print merged postings of last txn
    dump();
}
!txn {  # new transaction
    txn = $0;
    print;
    next
}
{
    acct = $2;
    amt = $3
}
amt ~ /-/ { # negative amounts, keep for later
    if (acct in post) { # duplicate entry
        if (!merged || merged == acct) { # only merge and clear one duplicate account
            post[acct] = "";
            merged = acct;
        }
        else  # tack on to existing record without merging
            post[acct] = post[acct] "\n" $0
    }
    else
        post[acct] = $0
    next
}
1

実行中：

~ ./foo.awk foo
2019/05/31 (MMEX948) Gürmar
    Expenses:Food:Groceries:Meat              ₺28,14
    Expenses:Food:Groceries:Meat              ₺28,14
    Expenses:Food:Groceries:Basic              ₺3,45
    Expenses:Food:Groceries:Produce           ₺15,00
    Assets:Cash:Marina

2019/06/01 (MMEX932) A101
    Expenses:Food:Groceries:Basic          $5.50
    Assets:Cash:Marina                    $-2.50
    Assets:Cash:Caleb                     $-3.00

2019/06/01 (MMEX931) Şemikler Pazar Yeri
    Expenses:Food:Groceries:Basic             ₺24,00
    Expenses:Food:Groceries:Meat              ₺31,00
    Expenses:Food:Groceries:Produce           ₺65,00
    Assets:Cash:Marina

Answer

このGNU awkスクリプトは私に適しています。

#! /usr/local/bin/awk -f
BEGIN { FS = "[[:space:]][[:space:]]+" }
function dump() {
    for (acct in post) { # dump unmerged postings of current transaction
        if (post[acct])
            print post[acct];
    }
    if (merged) {   # dump merged posting, if any
        printf "    %s\n", merged
    }
    merged = "";    # clear variables for next round
    delete post;
    txn = "";
}
!NF && txn {        # blank line, end of transaction
    dump();
    print;
    next
} 
END { # end-of-file, print merged postings of last txn
    dump();
}
!txn {  # new transaction
    txn = $0;
    print;
    next
}
{
    acct = $2;
    amt = $3
}
amt ~ /-/ { # negative amounts, keep for later
    if (acct in post) { # duplicate entry
        if (!merged || merged == acct) { # only merge and clear one duplicate account
            post[acct] = "";
            merged = acct;
        }
        else  # tack on to existing record without merging
            post[acct] = post[acct] "\n" $0
    }
    else
        post[acct] = $0
    next
}
1

実行中：

~ ./foo.awk foo
2019/05/31 (MMEX948) Gürmar
    Expenses:Food:Groceries:Meat              ₺28,14
    Expenses:Food:Groceries:Meat              ₺28,14
    Expenses:Food:Groceries:Basic              ₺3,45
    Expenses:Food:Groceries:Produce           ₺15,00
    Assets:Cash:Marina

2019/06/01 (MMEX932) A101
    Expenses:Food:Groceries:Basic          $5.50
    Assets:Cash:Marina                    $-2.50
    Assets:Cash:Caleb                     $-3.00

2019/06/01 (MMEX931) Şemikler Pazar Yeri
    Expenses:Food:Groceries:Basic             ₺24,00
    Expenses:Food:Groceries:Meat              ₺31,00
    Expenses:Food:Groceries:Produce           ₺65,00
    Assets:Cash:Marina

Question 2

gensub（）、配列の配列、およびsorted_inにGNU awkを使用する：

$ cat tst.awk
BEGIN { RS=""; FS="\n"; localeDecPt="."; PROCINFO["sorted_in"]="@val_num_desc" }
{
    delete sum
    print $1
    denom = gensub(/.*([^0-9.,-]).+$/,"\\1",1,$2)
    for (i=2; i<=NF; i++) {
        account = gensub(/[[:space:]]+[^[:space:]]+$/,"",1,$i)
        amount  = gensub(/.*[^0-9.,-](.+)$/,"\\1",1,$i)
        inputDecPt = gensub(/[0-9-]+/,"","g",amount)
        sum[account] += gensub("["inputDecPt"]",localeDecPt,"g",amount)
    }

    for (account in sum) {
        amount = denom gensub("["localeDecPt"]",inputDecPt,"g",sprintf("%0.2f",sum[account]))
        printf "%-*s%*s\n", 40, account, 10, amount
    }

    print ""
}

。

$ awk -f tst.awk file
2019/05/31 (MMEX948) Gürmar
    Expenses:Food:Groceries:Meat            ₺56,28
    Expenses:Food:Groceries:Produce         ₺15,00
    Expenses:Food:Groceries:Basic            ₺3,45
    Assets:Cash:Marina                     ₺-74,73

2019/06/01 (MMEX932) A101
    Expenses:Food:Groceries:Basic            $5.50
    Assets:Cash:Marina                      $-2.50
    Assets:Cash:Caleb                       $-3.00

2019/06/01 (MMEX931) Şemikler Pazar Yeri
    Expenses:Food:Groceries:Produce         ₺65,00
    Expenses:Food:Groceries:Meat            ₺31,00
    Expenses:Food:Groceries:Basic           ₺24,00
    Assets:Cash:Marina                    ₺-120,00

.小数点がその地域の小数点でない場合は、localeDecPt="."任意の小数点に変更してください。入力金額に千単位の区切り文字でカンマが含まれていると、公開したコードは機能しないため、テストする値を含む入力を指定する必要があります。出力フィールドの幅を40と10にハードコードしました。各フィールドの最大幅をかなり簡単に計算して使用したり、タブをOFSとして使用したり、出力をにパイプしたりできますが、columnどちらもそうではありません。必要。

マージするアイテムと重複するアイテムを特定する方法の要件を正直に理解していません。は何ですか？）。だから、次を組み合わせます。すべての重複項目の量を削除し、重複していない項目の量を維持します。これがうまくいかない場合は、質問の要件を明確にしてください。

Answer

gensub（）、配列の配列、およびsorted_inにGNU awkを使用する：

$ cat tst.awk
BEGIN { RS=""; FS="\n"; localeDecPt="."; PROCINFO["sorted_in"]="@val_num_desc" }
{
    delete sum
    print $1
    denom = gensub(/.*([^0-9.,-]).+$/,"\\1",1,$2)
    for (i=2; i<=NF; i++) {
        account = gensub(/[[:space:]]+[^[:space:]]+$/,"",1,$i)
        amount  = gensub(/.*[^0-9.,-](.+)$/,"\\1",1,$i)
        inputDecPt = gensub(/[0-9-]+/,"","g",amount)
        sum[account] += gensub("["inputDecPt"]",localeDecPt,"g",amount)
    }

    for (account in sum) {
        amount = denom gensub("["localeDecPt"]",inputDecPt,"g",sprintf("%0.2f",sum[account]))
        printf "%-*s%*s\n", 40, account, 10, amount
    }

    print ""
}

。

$ awk -f tst.awk file
2019/05/31 (MMEX948) Gürmar
    Expenses:Food:Groceries:Meat            ₺56,28
    Expenses:Food:Groceries:Produce         ₺15,00
    Expenses:Food:Groceries:Basic            ₺3,45
    Assets:Cash:Marina                     ₺-74,73

2019/06/01 (MMEX932) A101
    Expenses:Food:Groceries:Basic            $5.50
    Assets:Cash:Marina                      $-2.50
    Assets:Cash:Caleb                       $-3.00

2019/06/01 (MMEX931) Şemikler Pazar Yeri
    Expenses:Food:Groceries:Produce         ₺65,00
    Expenses:Food:Groceries:Meat            ₺31,00
    Expenses:Food:Groceries:Basic           ₺24,00
    Assets:Cash:Marina                    ₺-120,00

.小数点がその地域の小数点でない場合は、localeDecPt="."任意の小数点に変更してください。入力金額に千単位の区切り文字でカンマが含まれていると、公開したコードは機能しないため、テストする値を含む入力を指定する必要があります。出力フィールドの幅を40と10にハードコードしました。各フィールドの最大幅をかなり簡単に計算して使用したり、タブをOFSとして使用したり、出力をにパイプしたりできますが、columnどちらもそうではありません。必要。

マージするアイテムと重複するアイテムを特定する方法の要件を正直に理解していません。は何ですか？）。だから、次を組み合わせます。すべての重複項目の量を削除し、重複していない項目の量を維持します。これがうまくいかない場合は、質問の要件を明確にしてください。

特定の基準に基づいて行を並べ替え、他の行をマージします。

答え1

答え2

関連情報