#!/usr/bin/perl # Author: Pete Whitelock # Start Date: 15.5.94 # simple KWIC $usage = q! Usage: kwic [-ikb] [-c int|-l int -r int] pattern (filename) prints pattern in $opt_c characters of context on either side or L,R characters of context on left or right respectively (defaults to 50) -i means case-insensitive -k means print a tab-separated initial key in output as well -b means print   instead of spaces for html to browser set -c to 0 and pipe to sus (sort|uniq -c|sort -nr) to count instances of string in file !; use Getopt::Std; getopts('hikbc:l:r:'); die "$usage" if $opt_h; $printing_key = $opt_k; $case_insensitive = $opt_i; $html = $opt_b; $left_context = $opt_l || $opt_c || 50; $right_context = $opt_r || $opt_c || 50; $pattern = shift(@ARGV); if ($pattern eq '') {print "$usage\n"; exit;} # protect pattern $pattern =~ s#/#\\/#g; if (defined $ARGV[0]) { open(INPUT,$ARGV[0]) || die "Couldn't open file $ARGV[0]\n"; } else {open (INPUT,"-");} while(<>) { if ($printing_key) { ($key) = /^([^\t]*\t)/; $key =~ s#//P/Corpora and Linguistic Tools/BNCTagged/./../##; s/^[^\t]*\t//; } else { $key = ''; } $key =~ s/\t/ /g; $key_length = length($key); # updated this 9.12.99 to display tabs correctly s/\t/ /g; $left = ''; $match = ''; if ($case_insensitive) { while (/($pattern)/ig) { &do; } } else { while (/($pattern)/g) { &do } } } sub do { ($left,$match,$right) = ($`,$&,$'); $left =~ /.{0,$left_context}$/; $tleft = $&; $pad = ' ' x ($left_context - length($tleft)); $right =~ /^.{0,$right_context}/; printf "%-20s", $key if $key; if ($html) { $pad =~ s/ / /g; } print $pad,$tleft,$match,$&,"\n"; } exit;