old
最初は、URLのリストを含む2つのファイルを作成しました。これをおよびと呼びますnew
。これら2つのファイルを比較し、ファイルnew
にないURLがファイルにある場合は、ファイルold
に表示したいと思います。extra_urls
今、私はそのコマンドの使い方を少し読んでいますが、diff
私が理解したところでは、これは情報の順序も解析します。注文が出力に影響を与えたくありません。他の2つのファイルに配置されている順序に関係なく、追加のURLをファイル new
に印刷したいと思います。extra_urls
どうすればいいですか?
答え1
答え2
私はちょうど次を使用したいと思いますgrep
:
grep -vFf old new > extra_urls
説明する
-f
:grep
検索パターンをファイルから読み込むように指示します。この場合はold
。-v
:grepに一致を反転し、一致しない行のみを印刷するように指示します。-F
:grep に検索パターンを正規表現ではなく文字列として解釈するよう指示します。これにより、.
URLは文字通り一致します。
grep
これを組み合わせると、存在しないすべての行を印刷できます。ファイルの URL 順序は関係ありません。new
old
答え3
順序が重要なので、次を使用してください。awk
awk '
NR == FNR {old[$1]=1; next}
!($1 in old)
' old new > extra
答え4
以下は、URLだけを含まないテキストファイルでURLを見つけて比較するより一般的なソリューションです。
#!/bin/sh
# diffl.sh
# DIFF with Links - a "diff utility"-like .sh script
# (dash, bash, zsh compatible) that can find missing
# web links in one file compared to a group of files
# Please note that: for simplicity, in this script, only
# URLs containing "://" are taken into consideration,
# although there can be URLs that do not contain it
# (such as mailto:[email protected])
GetOS () {
OS_kernel_name=$(uname -s)
case "$OS_kernel_name" in
"Linux")
eval $1="Linux"
;;
"Darwin")
eval $1="Mac"
;;
"CYGWIN"*|"MSYS"*|"MINGW"*)
eval $1="Windows"
;;
"")
eval $1="unknown"
;;
*)
eval $1="other"
;;
esac
}
DetectShell () {
eval $1=\"\";
if [ -n "$BASH_VERSION" ]; then
eval $1=\"bash\";
elif [ -n "$ZSH_VERSION" ]; then
eval $1=\"zsh\";
elif [ "$PS1" = '$ ' ]; then
eval $1=\"dash\";
else
eval $1=\"undetermined\";
fi
}
PrintInTitle () {
printf "\033]0;%s\007" "$1"
}
PrintJustInTitle () {
PrintInTitle "$1">/dev/tty
}
trap1 () {
CleanUp
printf "\nAborted.\n">/dev/tty
}
CleanUp () {
#Restore "INTERRUPT" (CTRL-C) and "TERMINAL STOP" (CTRL-Z) signals:
trap - INT
trap - TSTP
#Clear the title:
PrintJustInTitle ""
#Restore initial IFS:
#IFS=$old_IFS
unset IFS
}
DisplayHelp () {
printf "\n"
printf "diffl - DIFF by URL web Links\n"
printf "\n"
printf " What it does:\n"
printf " - compares the URL web links in the two provided files (<file1> and <file2>) and shows the missing web links that are found in one but not in the other\n"
printf " Syntax:\n"
printf " <caller_shell> '/path/to/diffl.sh' <file1> <file2> ... <fileN> [flags]\n"
printf " - where:\n"
printf " - <caller_shell> can be any of the shells: dash, bash, zsh, or any other shell compatible with the \"dash\" shell syntax\n"
printf " - '/path/to/diffl.sh' represents the path of this script\n"
printf " - <file1> and <file2> represent the directory trees to be compared\n"
printf " - if more than two files are provided as parameters (<file1>, <file2>, ..., <fileN>): the web links in <file1> are compared with all the web links in <file2>, ... <fileN>\n"
printf " - [flags] can be:\n"
printf " --help or -h\n"
printf " Displays this help information\n"
printf " Output:\n"
printf " - lines starting with '<' signify web links from <file1>\n"
printf " - lines starting with '>' signify web links from <file2>, ..., <fileN>\n"
printf " Notes:\n"
printf " - for simplicity, in this script, only URLs containing \"://\" are taken into consideration, although there can be URLs that do not contain it (such as mailto:[email protected])\n"
printf "\n"
}
GetOS OS
#################################################################################
## Uncomment the next line if your OS is not Linux or Mac (and eventually ##
## modify the commands used (sed, sort, uniq) according to your system): ##
#################################################################################
#OS="userdefined"
DetectShell current_shell
if [ "$current_shell" = "undetermined" ]; then
printf "\nWarning: This script was designed to work with dash, bash and zsh shells.\n\n">/dev/tty
fi
#Get the program parameters into the array "params":
params_count=0
for i; do
params_count=$((params_count+1))
eval params_$params_count=\"\$i\"
done
params_0=$((params_count))
if [ "$params_0" = "0" ]; then #if no parameters are provided: display help
DisplayHelp
CleanUp && exit 0
fi
#Create a flags array. A flag denotes special parameters:
help_flag="0"
i=1;
j=0;
while [ "$i" -le "$((params_0))" ]; do
eval params_i=\"\$\{params_$i\}\"
case "${params_i}" in
"--help" | "-h" )
help_flag="1"
;;
* )
j=$((j+1))
eval selected_params_$j=\"\$params_i\"
;;
esac
i=$((i+1))
done
selected_params_0=$j
#Rebuild params array:
for i in $(seq 1 $selected_params_0); do
eval params_$i=\"\$\{selected_params_$i\}\"
done
params_0=$selected_params_0
if [ "$help_flag" = "1" ]; then
DisplayHelp
else #Run program:
NL=$(printf '%s' "\n\n"); #final NewLine is deleted
#or use:
#NL=$'\n'
error1="false"
error2="false"
error3="false"
{ sed --help >/dev/null 2>/dev/null; } || { error1="true"; }
{ sort --help >/dev/null 2>/dev/null; } || { error2="true"; }
{ uniq --help >/dev/null 2>/dev/null; } || { error3="true"; }
if [ "$error1" = "true" -o "$error2" = "true" -o "$error3" = "true" ]; then
{
printf "\n"
if [ "$error1" = "true" ]; then printf '%s' "ERROR: Could not run \"sed\" (necessary in order for this script to function correctly)!"; fi
if [ "$error2" = "true" ]; then printf '%s' "ERROR: Could not run \"sort\" (necessary in order for this script to function correctly)"; fi
if [ "$error3" = "true" ]; then printf '%s' "ERROR: Could not run \"uniq\" (necessary in order for this script to function correctly)"; fi
printf "\n"
}>/dev/stderr
exit
fi
if [ "$OS" = "Linux" -o "$OS" = "Mac" -o "$OS" = "userdefined" ]; then
# command1: sed -E 's/([a-zA-Z]*\:\/\/)/\\${NL}\1/g'
sed_command1='sed -E '"'"'s/([a-zA-Z]*\:\/\/)/'"\\${NL}"'\1/g'"'";
# command2: sed -n 's/\(\(.*\([^a-zA-Z+]\)\|\([a-zA-Z]\)\)\)\(\([a-zA-Z]\)*\:\/\/\)\([^ \t]*\).*/\4\5\7/p'
sed_command2='sed -n '"'"'s/\(\(.*\([^a-zA-Z+]\)\|\([a-zA-Z]\)\)\)\(\([a-zA-Z]\)*\:\/\/\)\([^ \t]*\).*/\4\5\7/p'"'"
# command3: sed -E 's/(.) [0-9]* (.*)/\1 \2/g'
sed_command3='sed -E '"'"'s/(.) [0-9]* (.*)/\1 \2/g'"'";
# command4: sed -E 's/^1/>/g;s/^0/</g'
sed_command4='sed -E '"'"'s/^1/>/g;s/^0/</g'"'"
else
printf '\n%s\n\n' "Error: Unsupported OS!">/dev/stderr
exit 1
fi
#Get the program parameters into the array "files":
count=0
for i; do
count=$((count+1))
eval files_$count=\"\$i\"
done
files_0=$((count))
error="false"
if [ "$files_0" -lt "2" ]; then
printf '\n%s\n' "ERROR: Please provide at least two parameters!">/dev/stderr
error="true"
fi
if [ "$error" = "true" ]; then
printf "\n"
exit 1
fi
error="false"
for i in $(seq 1 $files_0); do
eval current_file=\"\$files_$i\"
if [ ! \( -e "$current_file" -a -f "$current_file" \) ]; then
printf '\n%s\n' "ERROR: File \"$current_file\" does not exist or is not a regular file!">/dev/stderr
error="true"
fi
done
if [ "$error" = "true" ]; then
printf "\n"
exit 1
fi
#Proceed to finding and comparing links:
#Trap "INTERRUPT" (CTRL-C) and "TERMINAL STOP" (CTRL-Z) signals:
trap 'trap1' INT
trap 'trap1' TSTP
old_IFS="$IFS" #Store initial IFS value
IFS="
"
{
PrintJustInTitle "Searching for links [1]..."
mask="00000000000000000000"
{
count=0
for link in $(\
cat "$files_1" |\
eval $sed_command1 |\
eval $sed_command2\
); do
count_prev=$count
count=$((count+1))
if [ "${#count_prev}" -lt "${#count}" ]; then
mask="${mask%?}"
fi
number="$mask$count"
printf '%s\n' "0 $number $link"
PrintJustInTitle "Links found [1]: $((count))..."
done;
PrintJustInTitle "Sorting results [1]..."
}|sort -u -k 3
PrintJustInTitle "Searching for links [2]..."
mask="00000000000000000000"
{
count=0
for i in $(seq 2 $files_0); do
eval current_file=\"\$files_$i\"
for link in $(\
cat "$current_file" |\
eval $sed_command1 |\
eval $sed_command2\
); do
count_prev=$count
count=$((count+1))
if [ "${#count_prev}" -lt "${#count}" ]; then
mask="${mask%?}"
fi
number="$mask$count"
printf '%s\n' "1 $number $link"
PrintJustInTitle "Links found [2]: $((count))..."
done
done
PrintJustInTitle "Sorting results [2]..."
}|sort -u -k 3
PrintJustInTitle "Searching for unique links [3]..."
}|{\
sort -k 3|uniq -u -f 2|sort|eval $sed_command3|eval $sed_command4
PrintJustInTitle "Done";
}
CleanUp
fi
- 通事論:
<caller_shell> '/path/to/diffl.sh' <file1> <file2> ... <fileN>
- できること:
<file1>
<file2>, ..., <fileN>
これにより、ファイルグループと共通点のないURL Webリンクが表示されます。
- メモ:
- 簡単にするために、このスクリプトでは
://
""を含むURLのみを考慮します。
- 簡単にするために、このスクリプトでは