awkを使用して2つの日付間の視差を取得します。

Question 1

GNU awkを使う：

gawk '
  function dt2epoch(date, time,      timestr) {
    timestr = "20" substr(date,1,2) " " substr(date,4,2) " " substr(date,7,2) \
               " " substr(time,1,2) " " substr(time,4,2) " " substr(time,7,2)
    return mktime(timestr)
  }
  function epoch2hms(t) {
    return strftime("%H:%M:%S", t, 1)
  }
  function abs(n) {return n<0 ? -1*n : n}
  NR == 1 {next}
  { print epoch2hms(abs(dt2epoch($5,$6) - dt2epoch($1,$2))) }
' file

出力

00:01:20
00:01:09
00:01:20
00:01:08
00:02:00
00:01:37
00:01:39
00:01:33

Perlの場合は使用します日時生態系:

perl -MDateTime::Format::Strptime -lane '
    BEGIN {$f = DateTime::Format::Strptime->new(pattern => "%y-%m-%d %H:%M:%S")}
    next if $. == 1;
    $F[0] =~ s{/}{-}g;
    $t1 = $f->parse_datetime("$F[0] $F[1]");
    $t2 = $f->parse_datetime("$F[4] $F[5]");
    $d = $t1->subtract_datetime($t2);
    printf "%02d:%02d:%02d\n", $d->hours, $d->minutes, $d->seconds;
' file

非コアモジュールを必要としない高速バージョンのPerl

perl -MTime::Piece -lane '
    next if $. == 1;
    $t1 = Time::Piece->strptime("$F[0] $F[1]", "%y/%m/%d %H:%M:%S");
    $t2 = Time::Piece->strptime("$F[4] $F[5]", "%y-%m-%d %H:%M:%S");
    $diff = gmtime(abs($t1->epoch - $t2->epoch));
    print $diff->hms;
' file

または代替出力：

$ perl -MTime::Piece -lane '
    next if $. == 1;
    $t1 = Time::Piece->strptime("$F[0] $F[1]", "%y/%m/%d %H:%M:%S");
    $t2 = Time::Piece->strptime("$F[4] $F[5]", "%y-%m-%d %H:%M:%S");
    print abs($t1 - $t2)->pretty;
' file
1 minutes, 20 seconds
1 minutes, 9 seconds
1 minutes, 20 seconds
1 minutes, 8 seconds
2 minutes, 0 seconds
1 minutes, 37 seconds
1 minutes, 39 seconds
1 minutes, 33 seconds

Answer

GNU awkを使う：

gawk '
  function dt2epoch(date, time,      timestr) {
    timestr = "20" substr(date,1,2) " " substr(date,4,2) " " substr(date,7,2) \
               " " substr(time,1,2) " " substr(time,4,2) " " substr(time,7,2)
    return mktime(timestr)
  }
  function epoch2hms(t) {
    return strftime("%H:%M:%S", t, 1)
  }
  function abs(n) {return n<0 ? -1*n : n}
  NR == 1 {next}
  { print epoch2hms(abs(dt2epoch($5,$6) - dt2epoch($1,$2))) }
' file

出力

00:01:20
00:01:09
00:01:20
00:01:08
00:02:00
00:01:37
00:01:39
00:01:33

Perlの場合は使用します日時生態系:

perl -MDateTime::Format::Strptime -lane '
    BEGIN {$f = DateTime::Format::Strptime->new(pattern => "%y-%m-%d %H:%M:%S")}
    next if $. == 1;
    $F[0] =~ s{/}{-}g;
    $t1 = $f->parse_datetime("$F[0] $F[1]");
    $t2 = $f->parse_datetime("$F[4] $F[5]");
    $d = $t1->subtract_datetime($t2);
    printf "%02d:%02d:%02d\n", $d->hours, $d->minutes, $d->seconds;
' file

非コアモジュールを必要としない高速バージョンのPerl

perl -MTime::Piece -lane '
    next if $. == 1;
    $t1 = Time::Piece->strptime("$F[0] $F[1]", "%y/%m/%d %H:%M:%S");
    $t2 = Time::Piece->strptime("$F[4] $F[5]", "%y-%m-%d %H:%M:%S");
    $diff = gmtime(abs($t1->epoch - $t2->epoch));
    print $diff->hms;
' file

または代替出力：

$ perl -MTime::Piece -lane '
    next if $. == 1;
    $t1 = Time::Piece->strptime("$F[0] $F[1]", "%y/%m/%d %H:%M:%S");
    $t2 = Time::Piece->strptime("$F[4] $F[5]", "%y-%m-%d %H:%M:%S");
    print abs($t1 - $t2)->pretty;
' file
1 minutes, 20 seconds
1 minutes, 9 seconds
1 minutes, 20 seconds
1 minutes, 8 seconds
2 minutes, 0 seconds
1 minutes, 37 seconds
1 minutes, 39 seconds
1 minutes, 33 seconds

Question 2

使用bashとawk組み合わせ：

$ awk 'NR>1 {print $1,$2,$5,$6}' input  | while read d1 t1 d2 t2; do
  i1=$(date -u -d "20$d1 $t1" +%s)
  i2=$(date -u -d "20$d1 $t2" +%s)
  date -d @"$((i1-i2))" +%M:%S; 
done
01:20
01:09
01:20
01:08
02:00
01:37
01:39
01:33

Answer

使用bashとawk組み合わせ：

$ awk 'NR>1 {print $1,$2,$5,$6}' input  | while read d1 t1 d2 t2; do
  i1=$(date -u -d "20$d1 $t1" +%s)
  i2=$(date -u -d "20$d1 $t2" +%s)
  date -d @"$((i1-i2))" +%M:%S; 
done
01:20
01:09
01:20
01:08
02:00
01:37
01:39
01:33

Question 3

ベンチマーク：サンプルデータを何度も繰り返しました。

$ wc -l file
131073 file

今のスケジュールは次のとおりです。

$ time  awk 'NR>1 {print $1,$2,$5,$6}' file |
while read d1 t1 d2 t2; do
  i1=$(date -u -d "20$d1 $t1" +%s)
  i2=$(date -u -d "20$d1 $t2" +%s)
  date -d @"$((i1-i2))" +%M:%S
done > /dev/null

real    8m55.533s
user    5m46.956s
sys     1m33.726s

$ time perl -MDateTime::Format::Strptime -lane '
    BEGIN {$f = DateTime::Format::Strptime->new(pattern => "%y-%m-%d %H:%M:%S")}
    next if $. == 1;
    $F[0] =~ s{/}{-}g;
    $t1 = $f->parse_datetime("$F[0] $F[1]");
    $t2 = $f->parse_datetime("$F[4] $F[5]");
    $d = $t1->subtract_datetime($t2);printf "%02d:%02d:%02d\n", $d->hours, $d->minutes, $d->seconds;
' file > /dev/null

real    0m37.684s
user    0m33.168s
sys     0m0.058s

$ time gawk '
  function dt2epoch(date, time,      timestr) {
    timestr = "20" substr(date,1,2) " " substr(date,4,2) " " substr(date,7,2) \
               " " substr(time,1,2) " " substr(time,4,2) " " substr(time,7,2)
    return mktime(timestr)
  }
  function epoch2hms(t) {
    return strftime("%H:%M:%S", t, 1)
  }
  function abs(n) {return n<0 ? -1*n : n}
  NR == 1 {next}
  { print epoch2hms(abs(dt2epoch($5,$6) - dt2epoch($1,$2))) }
' file > /dev/null

real    0m1.074s
user    0m0.610s
sys     0m0.366s

時間関数が組み込まれたGNU awkは、すべての文字列操作にもかかわらず確実な勝者です。

アップデート：新しいPerlの実装。それでもgawkよりも遅いですが、機能は豊富ですが、重量が重いDateTimeモジュールを使用するバージョンよりはるかに先です。

$ time perl -MTime::Piece -lane '
    next if $. == 1;
    $t1 = Time::Piece->strptime("$F[0] $F[1]", "%y/%m/%d %H:%M:%S");
    $t2 = Time::Piece->strptime("$F[4] $F[5]", "%y-%m-%d %H:%M:%S");
    $diff = gmtime(abs($t1->epoch - $t2->epoch));
    print $diff->hms;           
' file > /dev/null

real    0m2.631s
user    0m2.231s
sys     0m0.170s

Answer

ベンチマーク：サンプルデータを何度も繰り返しました。

$ wc -l file
131073 file

今のスケジュールは次のとおりです。

$ time  awk 'NR>1 {print $1,$2,$5,$6}' file |
while read d1 t1 d2 t2; do
  i1=$(date -u -d "20$d1 $t1" +%s)
  i2=$(date -u -d "20$d1 $t2" +%s)
  date -d @"$((i1-i2))" +%M:%S
done > /dev/null

real    8m55.533s
user    5m46.956s
sys     1m33.726s

$ time perl -MDateTime::Format::Strptime -lane '
    BEGIN {$f = DateTime::Format::Strptime->new(pattern => "%y-%m-%d %H:%M:%S")}
    next if $. == 1;
    $F[0] =~ s{/}{-}g;
    $t1 = $f->parse_datetime("$F[0] $F[1]");
    $t2 = $f->parse_datetime("$F[4] $F[5]");
    $d = $t1->subtract_datetime($t2);printf "%02d:%02d:%02d\n", $d->hours, $d->minutes, $d->seconds;
' file > /dev/null

real    0m37.684s
user    0m33.168s
sys     0m0.058s

$ time gawk '
  function dt2epoch(date, time,      timestr) {
    timestr = "20" substr(date,1,2) " " substr(date,4,2) " " substr(date,7,2) \
               " " substr(time,1,2) " " substr(time,4,2) " " substr(time,7,2)
    return mktime(timestr)
  }
  function epoch2hms(t) {
    return strftime("%H:%M:%S", t, 1)
  }
  function abs(n) {return n<0 ? -1*n : n}
  NR == 1 {next}
  { print epoch2hms(abs(dt2epoch($5,$6) - dt2epoch($1,$2))) }
' file > /dev/null

real    0m1.074s
user    0m0.610s
sys     0m0.366s

時間関数が組み込まれたGNU awkは、すべての文字列操作にもかかわらず確実な勝者です。

アップデート：新しいPerlの実装。それでもgawkよりも遅いですが、機能は豊富ですが、重量が重いDateTimeモジュールを使用するバージョンよりはるかに先です。

$ time perl -MTime::Piece -lane '
    next if $. == 1;
    $t1 = Time::Piece->strptime("$F[0] $F[1]", "%y/%m/%d %H:%M:%S");
    $t2 = Time::Piece->strptime("$F[4] $F[5]", "%y-%m-%d %H:%M:%S");
    $diff = gmtime(abs($t1->epoch - $t2->epoch));
    print $diff->hms;           
' file > /dev/null

real    0m2.631s
user    0m2.231s
sys     0m0.170s

awkを使用して2つの日付間の視差を取得します。

答え1

答え2

答え3

関連情報