word.tcl (4860B)
1 # word.tcl -- 2 # 3 # This file defines various procedures for computing word boundaries in 4 # strings. This file is primarily needed so Tk text and entry widgets behave 5 # properly for different platforms. 6 # 7 # Copyright (c) 1996 by Sun Microsystems, Inc. 8 # Copyright (c) 1998 by Scritpics Corporation. 9 # 10 # See the file "license.terms" for information on usage and redistribution 11 # of this file, and for a DISCLAIMER OF ALL WARRANTIES. 12 13 # The following variables are used to determine which characters are 14 # interpreted as white space. 15 16 if {$::tcl_platform(platform) eq "windows"} { 17 # Windows style - any but a unicode space char 18 if {![info exists ::tcl_wordchars]} { 19 set ::tcl_wordchars {\S} 20 } 21 if {![info exists ::tcl_nonwordchars]} { 22 set ::tcl_nonwordchars {\s} 23 } 24 } else { 25 # Motif style - any unicode word char (number, letter, or underscore) 26 if {![info exists ::tcl_wordchars]} { 27 set ::tcl_wordchars {\w} 28 } 29 if {![info exists ::tcl_nonwordchars]} { 30 set ::tcl_nonwordchars {\W} 31 } 32 } 33 34 # Arrange for caches of the real matcher REs to be kept, which enables the REs 35 # themselves to be cached for greater performance (and somewhat greater 36 # clarity too). 37 38 namespace eval ::tcl { 39 variable WordBreakRE 40 array set WordBreakRE {} 41 42 proc UpdateWordBreakREs args { 43 # Ignores the arguments 44 global tcl_wordchars tcl_nonwordchars 45 variable WordBreakRE 46 47 # To keep the RE strings short... 48 set letter $tcl_wordchars 49 set space $tcl_nonwordchars 50 51 set WordBreakRE(after) "$letter$space|$space$letter" 52 set WordBreakRE(before) "^.*($letter$space|$space$letter)" 53 set WordBreakRE(end) "$space*$letter+$space" 54 set WordBreakRE(next) "$letter*$space+$letter" 55 set WordBreakRE(previous) "$space*($letter+)$space*\$" 56 } 57 58 # Initialize the cache 59 UpdateWordBreakREs 60 trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs 61 trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs 62 } 63 64 # tcl_wordBreakAfter -- 65 # 66 # This procedure returns the index of the first word boundary after the 67 # starting point in the given string, or -1 if there are no more boundaries in 68 # the given string. The index returned refers to the first character of the 69 # pair that comprises a boundary. 70 # 71 # Arguments: 72 # str - String to search. 73 # start - Index into string specifying starting point. 74 75 proc tcl_wordBreakAfter {str start} { 76 variable ::tcl::WordBreakRE 77 set result {-1 -1} 78 regexp -indices -start $start -- $WordBreakRE(after) $str result 79 return [lindex $result 1] 80 } 81 82 # tcl_wordBreakBefore -- 83 # 84 # This procedure returns the index of the first word boundary before the 85 # starting point in the given string, or -1 if there are no more boundaries in 86 # the given string. The index returned refers to the second character of the 87 # pair that comprises a boundary. 88 # 89 # Arguments: 90 # str - String to search. 91 # start - Index into string specifying starting point. 92 93 proc tcl_wordBreakBefore {str start} { 94 variable ::tcl::WordBreakRE 95 set result {-1 -1} 96 regexp -indices -- $WordBreakRE(before) [string range $str 0 $start] result 97 return [lindex $result 1] 98 } 99 100 # tcl_endOfWord -- 101 # 102 # This procedure returns the index of the first end-of-word location after a 103 # starting index in the given string. An end-of-word location is defined to be 104 # the first whitespace character following the first non-whitespace character 105 # after the starting point. Returns -1 if there are no more words after the 106 # starting point. 107 # 108 # Arguments: 109 # str - String to search. 110 # start - Index into string specifying starting point. 111 112 proc tcl_endOfWord {str start} { 113 variable ::tcl::WordBreakRE 114 set result {-1 -1} 115 regexp -indices -start $start -- $WordBreakRE(end) $str result 116 return [lindex $result 1] 117 } 118 119 # tcl_startOfNextWord -- 120 # 121 # This procedure returns the index of the first start-of-word location after a 122 # starting index in the given string. A start-of-word location is defined to 123 # be a non-whitespace character following a whitespace character. Returns -1 124 # if there are no more start-of-word locations after the starting point. 125 # 126 # Arguments: 127 # str - String to search. 128 # start - Index into string specifying starting point. 129 130 proc tcl_startOfNextWord {str start} { 131 variable ::tcl::WordBreakRE 132 set result {-1 -1} 133 regexp -indices -start $start -- $WordBreakRE(next) $str result 134 return [lindex $result 1] 135 } 136 137 # tcl_startOfPreviousWord -- 138 # 139 # This procedure returns the index of the first start-of-word location before 140 # a starting index in the given string. 141 # 142 # Arguments: 143 # str - String to search. 144 # start - Index into string specifying starting point. 145 146 proc tcl_startOfPreviousWord {str start} { 147 variable ::tcl::WordBreakRE 148 set word {-1 -1} 149 regexp -indices -- $WordBreakRE(previous) [string range $str 0 $start-1] \ 150 result word 151 return [lindex $word 0] 152 }