| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  | " Script to extract tables from Unicode .txt files, to be used in src/mbyte.c. | 
					
						
							|  |  |  | " The format of the UnicodeData.txt file is explained here: | 
					
						
							|  |  |  | " http://www.unicode.org/Public/5.1.0/ucd/UCD.html | 
					
						
							|  |  |  | " For the other files see the header. | 
					
						
							|  |  |  | " | 
					
						
							| 
									
										
										
										
											2017-06-22 15:27:37 +02:00
										 |  |  | " Might need to update the URL to the emoji-data.txt | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  | " Usage: Vim -S <this-file> | 
					
						
							|  |  |  | " | 
					
						
							|  |  |  | " Author: Bram Moolenaar | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  | " Last Update: 2020 Aug 24 | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | " Parse lines of UnicodeData.txt.  Creates a list of lists in s:dataprops. | 
					
						
							|  |  |  | func! ParseDataToProps() | 
					
						
							|  |  |  |   let s:dataprops = [] | 
					
						
							|  |  |  |   let lnum = 1 | 
					
						
							|  |  |  |   while lnum <= line('$') | 
					
						
							|  |  |  |     let l = split(getline(lnum), '\s*;\s*', 1) | 
					
						
							|  |  |  |     if len(l) != 15 | 
					
						
							|  |  |  |       echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 15' | 
					
						
							|  |  |  |       return | 
					
						
							|  |  |  |     endif | 
					
						
							|  |  |  |     call add(s:dataprops, l) | 
					
						
							|  |  |  |     let lnum += 1 | 
					
						
							|  |  |  |   endwhile | 
					
						
							|  |  |  | endfunc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Parse lines of CaseFolding.txt.  Creates a list of lists in s:foldprops. | 
					
						
							|  |  |  | func! ParseFoldProps() | 
					
						
							|  |  |  |   let s:foldprops = [] | 
					
						
							|  |  |  |   let lnum = 1 | 
					
						
							|  |  |  |   while lnum <= line('$') | 
					
						
							|  |  |  |     let line = getline(lnum) | 
					
						
							|  |  |  |     if line !~ '^#' && line !~ '^\s*$' | 
					
						
							|  |  |  |       let l = split(line, '\s*;\s*', 1) | 
					
						
							|  |  |  |       if len(l) != 4 | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4' | 
					
						
							|  |  |  |         return | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       endif | 
					
						
							|  |  |  |       call add(s:foldprops, l) | 
					
						
							|  |  |  |     endif | 
					
						
							|  |  |  |     let lnum += 1 | 
					
						
							|  |  |  |   endwhile | 
					
						
							|  |  |  | endfunc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Parse lines of EastAsianWidth.txt.  Creates a list of lists in s:widthprops. | 
					
						
							|  |  |  | func! ParseWidthProps() | 
					
						
							|  |  |  |   let s:widthprops = [] | 
					
						
							|  |  |  |   let lnum = 1 | 
					
						
							|  |  |  |   while lnum <= line('$') | 
					
						
							|  |  |  |     let line = getline(lnum) | 
					
						
							|  |  |  |     if line !~ '^#' && line !~ '^\s*$' | 
					
						
							|  |  |  |       let l = split(line, '\s*;\s*', 1) | 
					
						
							|  |  |  |       if len(l) != 2 | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2' | 
					
						
							|  |  |  |         return | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       endif | 
					
						
							|  |  |  |       call add(s:widthprops, l) | 
					
						
							|  |  |  |     endif | 
					
						
							|  |  |  |     let lnum += 1 | 
					
						
							|  |  |  |   endwhile | 
					
						
							|  |  |  | endfunc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Build the toLower or toUpper table in a new buffer. | 
					
						
							|  |  |  | " Uses s:dataprops. | 
					
						
							|  |  |  | func! BuildCaseTable(name, index) | 
					
						
							|  |  |  |   let start = -1 | 
					
						
							|  |  |  |   let end = -1 | 
					
						
							|  |  |  |   let step = 0 | 
					
						
							|  |  |  |   let add = -1 | 
					
						
							|  |  |  |   let ranges = [] | 
					
						
							|  |  |  |   for p in s:dataprops | 
					
						
							|  |  |  |     if p[a:index] != '' | 
					
						
							|  |  |  |       let n = ('0x' . p[0]) + 0 | 
					
						
							|  |  |  |       let nl = ('0x' . p[a:index]) + 0 | 
					
						
							|  |  |  |       if start >= 0 && add == nl - n && (step == 0 || n - end == step) | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         " continue with same range. | 
					
						
							|  |  |  |         let step = n - end | 
					
						
							|  |  |  |         let end = n | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       else | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         if start >= 0 | 
					
						
							|  |  |  |           " produce previous range | 
					
						
							|  |  |  |           call Range(ranges, start, end, step, add) | 
					
						
							|  |  |  |         endif | 
					
						
							|  |  |  |         let start = n | 
					
						
							|  |  |  |         let end = n | 
					
						
							|  |  |  |         let step = 0 | 
					
						
							|  |  |  |         let add = nl - n | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       endif | 
					
						
							|  |  |  |     endif | 
					
						
							|  |  |  |   endfor | 
					
						
							|  |  |  |   if start >= 0 | 
					
						
							|  |  |  |     call Range(ranges, start, end, step, add) | 
					
						
							|  |  |  |   endif | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   " New buffer to put the result in. | 
					
						
							|  |  |  |   new | 
					
						
							|  |  |  |   exe "file to" . a:name | 
					
						
							|  |  |  |   call setline(1, "static convertStruct to" . a:name . "[] =") | 
					
						
							|  |  |  |   call setline(2, "{") | 
					
						
							|  |  |  |   call append('$', ranges) | 
					
						
							|  |  |  |   call setline('$', getline('$')[:-2])  " remove last comma | 
					
						
							|  |  |  |   call setline(line('$') + 1, "};") | 
					
						
							|  |  |  |   wincmd p | 
					
						
							|  |  |  | endfunc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Build the foldCase table in a new buffer. | 
					
						
							|  |  |  | " Uses s:foldprops. | 
					
						
							|  |  |  | func! BuildFoldTable() | 
					
						
							|  |  |  |   let start = -1 | 
					
						
							|  |  |  |   let end = -1 | 
					
						
							|  |  |  |   let step = 0 | 
					
						
							|  |  |  |   let add = -1 | 
					
						
							|  |  |  |   let ranges = [] | 
					
						
							|  |  |  |   for p in s:foldprops | 
					
						
							|  |  |  |     if p[1] == 'C' || p[1] == 'S' | 
					
						
							|  |  |  |       let n = ('0x' . p[0]) + 0 | 
					
						
							|  |  |  |       let nl = ('0x' . p[2]) + 0 | 
					
						
							|  |  |  |       if start >= 0 && add == nl - n && (step == 0 || n - end == step) | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         " continue with same range. | 
					
						
							|  |  |  |         let step = n - end | 
					
						
							|  |  |  |         let end = n | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       else | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         if start >= 0 | 
					
						
							|  |  |  |           " produce previous range | 
					
						
							|  |  |  |           call Range(ranges, start, end, step, add) | 
					
						
							|  |  |  |         endif | 
					
						
							|  |  |  |         let start = n | 
					
						
							|  |  |  |         let end = n | 
					
						
							|  |  |  |         let step = 0 | 
					
						
							|  |  |  |         let add = nl - n | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       endif | 
					
						
							|  |  |  |     endif | 
					
						
							|  |  |  |   endfor | 
					
						
							|  |  |  |   if start >= 0 | 
					
						
							|  |  |  |     call Range(ranges, start, end, step, add) | 
					
						
							|  |  |  |   endif | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   " New buffer to put the result in. | 
					
						
							|  |  |  |   new | 
					
						
							|  |  |  |   file foldCase | 
					
						
							|  |  |  |   call setline(1, "static convertStruct foldCase[] =") | 
					
						
							|  |  |  |   call setline(2, "{") | 
					
						
							|  |  |  |   call append('$', ranges) | 
					
						
							|  |  |  |   call setline('$', getline('$')[:-2])  " remove last comma | 
					
						
							|  |  |  |   call setline(line('$') + 1, "};") | 
					
						
							|  |  |  |   wincmd p | 
					
						
							|  |  |  | endfunc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func! Range(ranges, start, end, step, add) | 
					
						
							|  |  |  |   let s = printf("\t{0x%x,0x%x,%d,%d},", a:start, a:end, a:step == 0 ? -1 : a:step, a:add) | 
					
						
							|  |  |  |   call add(a:ranges, s) | 
					
						
							|  |  |  | endfunc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Build the combining table. | 
					
						
							|  |  |  | " Uses s:dataprops. | 
					
						
							|  |  |  | func! BuildCombiningTable() | 
					
						
							|  |  |  |   let start = -1 | 
					
						
							|  |  |  |   let end = -1 | 
					
						
							|  |  |  |   let ranges = [] | 
					
						
							|  |  |  |   for p in s:dataprops | 
					
						
							| 
									
										
										
										
											2022-10-05 18:03:00 +01:00
										 |  |  |     " The 'Mc' property was removed, it does take up space. | 
					
						
							|  |  |  |     if p[2] == 'Mn' || p[2] == 'Me' | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       let n = ('0x' . p[0]) + 0 | 
					
						
							|  |  |  |       if start >= 0 && end + 1 == n | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         " continue with same range. | 
					
						
							|  |  |  |         let end = n | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       else | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         if start >= 0 | 
					
						
							|  |  |  |           " produce previous range | 
					
						
							|  |  |  |           call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | 
					
						
							|  |  |  |         endif | 
					
						
							|  |  |  |         let start = n | 
					
						
							|  |  |  |         let end = n | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       endif | 
					
						
							|  |  |  |     endif | 
					
						
							|  |  |  |   endfor | 
					
						
							|  |  |  |   if start >= 0 | 
					
						
							|  |  |  |     call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | 
					
						
							|  |  |  |   endif | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   " New buffer to put the result in. | 
					
						
							|  |  |  |   new | 
					
						
							|  |  |  |   file combining | 
					
						
							|  |  |  |   call setline(1, "    static struct interval combining[] =") | 
					
						
							|  |  |  |   call setline(2, "    {") | 
					
						
							|  |  |  |   call append('$', ranges) | 
					
						
							|  |  |  |   call setline('$', getline('$')[:-2])  " remove last comma | 
					
						
							|  |  |  |   call setline(line('$') + 1, "    };") | 
					
						
							|  |  |  |   wincmd p | 
					
						
							|  |  |  | endfunc | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  | " Build the double width or ambiguous width table in a new buffer. | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  | " Uses s:widthprops and s:dataprops. | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  | func! BuildWidthTable(pattern, tableName) | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |   let start = -1 | 
					
						
							|  |  |  |   let end = -1 | 
					
						
							|  |  |  |   let ranges = [] | 
					
						
							|  |  |  |   let dataidx = 0 | 
					
						
							| 
									
										
										
										
											2021-06-27 21:30:14 +02:00
										 |  |  |   " Account for indentation differences between ambiguous and doublewidth | 
					
						
							|  |  |  |   " table in mbyte.c | 
					
						
							|  |  |  |   if a:pattern == 'A' | 
					
						
							|  |  |  |     let spc = '    ' | 
					
						
							|  |  |  |   else | 
					
						
							|  |  |  |     let spc = "\t" | 
					
						
							|  |  |  |   endif | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |   for p in s:widthprops | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  |     if p[1][0] =~ a:pattern | 
					
						
							|  |  |  |       if p[0] =~ '\.\.' | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         " It is a range.  we don't check for composing char then. | 
					
						
							|  |  |  |         let rng = split(p[0], '\.\.') | 
					
						
							|  |  |  |         if len(rng) != 2 | 
					
						
							|  |  |  |           echoerr "Cannot parse range: '" . p[0] . "' in width table" | 
					
						
							|  |  |  |         endif | 
					
						
							|  |  |  |         let n = ('0x' . rng[0]) + 0 | 
					
						
							|  |  |  |         let n_last =  ('0x' . rng[1]) + 0 | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  |       else | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         let n = ('0x' . p[0]) + 0 | 
					
						
							|  |  |  |         let n_last = n | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  |       endif | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       " Find this char in the data table. | 
					
						
							|  |  |  |       while 1 | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         let dn = ('0x' . s:dataprops[dataidx][0]) + 0 | 
					
						
							|  |  |  |         if dn >= n | 
					
						
							|  |  |  |           break | 
					
						
							|  |  |  |         endif | 
					
						
							|  |  |  |         let dataidx += 1 | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       endwhile | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  |       if dn != n && n_last == n | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         echoerr "Cannot find character " . n . " in data table" | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       endif | 
					
						
							|  |  |  |       " Only use the char when it's not a composing char. | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  |       " But use all chars from a range. | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       let dp = s:dataprops[dataidx] | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  |       if n_last > n || (dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me') | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |         if start >= 0 && end + 1 == n | 
					
						
							|  |  |  |           " continue with same range. | 
					
						
							|  |  |  |         else | 
					
						
							|  |  |  |           if start >= 0 | 
					
						
							|  |  |  |             " produce previous range | 
					
						
							| 
									
										
										
										
											2021-06-27 21:30:14 +02:00
										 |  |  |             call add(ranges, printf("%s{0x%04x, 0x%04x},", spc, start, end)) | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  | 	    if a:pattern == 'A' | 
					
						
							|  |  |  | 	      call add(s:ambitable, [start, end]) | 
					
						
							|  |  |  | 	    else | 
					
						
							|  |  |  | 	      call add(s:doubletable, [start, end]) | 
					
						
							|  |  |  | 	    endif | 
					
						
							|  |  |  |           endif | 
					
						
							|  |  |  |           let start = n | 
					
						
							|  |  |  |         endif | 
					
						
							|  |  |  |         let end = n_last | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |       endif | 
					
						
							|  |  |  |     endif | 
					
						
							|  |  |  |   endfor | 
					
						
							|  |  |  |   if start >= 0 | 
					
						
							| 
									
										
										
										
											2021-06-27 21:30:14 +02:00
										 |  |  |     call add(ranges, printf("%s{0x%04x, 0x%04x},", spc, start, end)) | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |     if a:pattern == 'A' | 
					
						
							|  |  |  |       call add(s:ambitable, [start, end]) | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       call add(s:doubletable, [start, end]) | 
					
						
							|  |  |  |     endif | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |   endif | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   " New buffer to put the result in. | 
					
						
							|  |  |  |   new | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  |   exe "file " . a:tableName | 
					
						
							| 
									
										
										
										
											2021-06-27 21:30:14 +02:00
										 |  |  |   if a:pattern == 'A' | 
					
						
							|  |  |  |     call setline(1, "static struct interval " . a:tableName . "[] =") | 
					
						
							|  |  |  |     call setline(2, "{") | 
					
						
							|  |  |  |   else | 
					
						
							|  |  |  |     call setline(1, "    static struct interval " . a:tableName . "[] =") | 
					
						
							|  |  |  |     call setline(2, "    {") | 
					
						
							|  |  |  |   endif | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |   call append('$', ranges) | 
					
						
							|  |  |  |   call setline('$', getline('$')[:-2])  " remove last comma | 
					
						
							| 
									
										
										
										
											2021-06-27 21:30:14 +02:00
										 |  |  |   if a:pattern == 'A' | 
					
						
							|  |  |  |     call setline(line('$') + 1, "};") | 
					
						
							|  |  |  |   else | 
					
						
							|  |  |  |     call setline(line('$') + 1, "    };") | 
					
						
							|  |  |  |   endif | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  |   wincmd p | 
					
						
							|  |  |  | endfunc | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | " Get characters from a list of lines in form "12ab .." or "12ab..56cd ..." | 
					
						
							|  |  |  | " and put them in dictionary "chardict" | 
					
						
							|  |  |  | func AddLinesToCharDict(lines, chardict) | 
					
						
							|  |  |  |   for line in a:lines | 
					
						
							|  |  |  |     let tokens = split(line, '\.\.') | 
					
						
							|  |  |  |     let first = str2nr(tokens[0], 16) | 
					
						
							|  |  |  |     if len(tokens) == 1 | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |       let last = first | 
					
						
							|  |  |  |     else | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  |       let last = str2nr(tokens[1], 16) | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |     endif | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  |     for nr in range(first, last) | 
					
						
							|  |  |  |       let a:chardict[nr] = 1 | 
					
						
							|  |  |  |     endfor | 
					
						
							|  |  |  |   endfor | 
					
						
							|  |  |  | endfunc | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  | func Test_AddLinesToCharDict() | 
					
						
							|  |  |  |   let dict = {} | 
					
						
							|  |  |  |   call AddLinesToCharDict([ | 
					
						
							|  |  |  | 	\ '1234 blah blah', | 
					
						
							|  |  |  | 	\ '1235 blah blah', | 
					
						
							|  |  |  | 	\ '12a0..12a2 blah blah', | 
					
						
							|  |  |  | 	\ '12a1 blah blah', | 
					
						
							|  |  |  | 	\ ], dict) | 
					
						
							|  |  |  |   call assert_equal({0x1234: 1, 0x1235: 1, | 
					
						
							|  |  |  | 	\ 0x12a0: 1, 0x12a1: 1, 0x12a2: 1, | 
					
						
							|  |  |  | 	\ }, dict) | 
					
						
							|  |  |  |   if v:errors != [] | 
					
						
							|  |  |  |     echoerr 'AddLinesToCharDict' v:errors | 
					
						
							|  |  |  |     return 1 | 
					
						
							|  |  |  |   endif | 
					
						
							|  |  |  |   return 0 | 
					
						
							|  |  |  | endfunc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func CharDictToPairList(chardict) | 
					
						
							|  |  |  |   let result = [] | 
					
						
							|  |  |  |   let keys = keys(a:chardict)->map('str2nr(v:val)')->sort('N') | 
					
						
							|  |  |  |   let low = keys[0] | 
					
						
							|  |  |  |   let high = keys[0] | 
					
						
							|  |  |  |   for key in keys | 
					
						
							|  |  |  |     if key > high + 1 | 
					
						
							|  |  |  |       call add(result, [low, high]) | 
					
						
							|  |  |  |       let low = key | 
					
						
							|  |  |  |       let high = key | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |     else | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  |       let high = key | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |     endif | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  |   endfor | 
					
						
							|  |  |  |   call add(result, [low, high]) | 
					
						
							|  |  |  |   return result | 
					
						
							|  |  |  | endfunc | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  | func Test_CharDictToPairList() | 
					
						
							|  |  |  |   let dict = {0x1020: 1, 0x1021: 1, 0x1022: 1, | 
					
						
							|  |  |  | 	\ 0x1024: 1, | 
					
						
							|  |  |  | 	\ 0x2022: 1, | 
					
						
							|  |  |  | 	\ 0x2024: 1, 0x2025: 1} | 
					
						
							|  |  |  |   call assert_equal([ | 
					
						
							|  |  |  | 	\ [0x1020, 0x1022], | 
					
						
							|  |  |  | 	\ [0x1024, 0x1024], | 
					
						
							|  |  |  | 	\ [0x2022, 0x2022], | 
					
						
							|  |  |  | 	\ [0x2024, 0x2025], | 
					
						
							|  |  |  | 	\ ], CharDictToPairList(dict)) | 
					
						
							|  |  |  |   if v:errors != [] | 
					
						
							|  |  |  |     echoerr 'CharDictToPairList' v:errors | 
					
						
							|  |  |  |     return 1 | 
					
						
							|  |  |  |   endif | 
					
						
							|  |  |  |   return 0 | 
					
						
							|  |  |  | endfunc | 
					
						
							| 
									
										
										
										
											2016-03-24 18:24:58 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  | " Build the amoji width table in a new buffer. | 
					
						
							|  |  |  | func BuildEmojiTable() | 
					
						
							|  |  |  |   " First make the table for all emojis. | 
					
						
							|  |  |  |   let pattern = '; Emoji\s\+#\s' | 
					
						
							|  |  |  |   let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~pattern'), 'matchstr(v:val,"^\\S\\+")') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   " Make a dictionary with an entry for each character. | 
					
						
							|  |  |  |   let chardict = {} | 
					
						
							|  |  |  |   call AddLinesToCharDict(lines, chardict) | 
					
						
							|  |  |  |   let pairlist = CharDictToPairList(chardict) | 
					
						
							|  |  |  |   let allranges = map(pairlist, 'printf("    {0x%04x, 0x%04x},", v:val[0], v:val[1])') | 
					
						
							| 
									
										
										
										
											2016-03-19 18:42:29 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |   " New buffer to put the result in. | 
					
						
							|  |  |  |   new | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  |   exe 'file emoji_all' | 
					
						
							|  |  |  |   call setline(1, "static struct interval emoji_all[] =") | 
					
						
							|  |  |  |   call setline(2, "{") | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |   call append('$', allranges) | 
					
						
							|  |  |  |   call setline('$', getline('$')[:-2])  " remove last comma | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  |   call setline(line('$') + 1, "};") | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |   wincmd p | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  |   " Make the table for wide emojis. | 
					
						
							|  |  |  |   let pattern = '; Emoji_\(Presentation\|Modifier_Base\)\s\+#\s' | 
					
						
							|  |  |  |   let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~pattern'), 'matchstr(v:val,"^\\S\\+")') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   " Make a dictionary with an entry for each character. | 
					
						
							|  |  |  |   let chardict = {} | 
					
						
							|  |  |  |   call AddLinesToCharDict(lines, chardict) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   " exclude characters that are in the "ambiguous" or "doublewidth" table | 
					
						
							|  |  |  |   for ambi in s:ambitable | 
					
						
							|  |  |  |     for nr in range(ambi[0], ambi[1]) | 
					
						
							|  |  |  |       if has_key(chardict, nr) | 
					
						
							|  |  |  | 	call remove(chardict, nr) | 
					
						
							|  |  |  |       endif | 
					
						
							|  |  |  |     endfor | 
					
						
							|  |  |  |   endfor | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for wide in s:doubletable | 
					
						
							|  |  |  |     for nr in range(wide[0], wide[1]) | 
					
						
							|  |  |  |       if has_key(chardict, nr) | 
					
						
							|  |  |  | 	call remove(chardict, nr) | 
					
						
							|  |  |  |       endif | 
					
						
							|  |  |  |     endfor | 
					
						
							|  |  |  |   endfor | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   let pairlist = CharDictToPairList(chardict) | 
					
						
							|  |  |  |   let wide_ranges = map(pairlist, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |   " New buffer to put the result in. | 
					
						
							|  |  |  |   new | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  |   exe 'file emoji_wide' | 
					
						
							|  |  |  |   call setline(1, "    static struct interval emoji_wide[] =") | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  |   call setline(2, "    {") | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  |   call append('$', wide_ranges) | 
					
						
							| 
									
										
										
										
											2016-03-19 18:42:29 +01:00
										 |  |  |   call setline('$', getline('$')[:-2])  " remove last comma | 
					
						
							|  |  |  |   call setline(line('$') + 1, "    };") | 
					
						
							|  |  |  |   wincmd p | 
					
						
							|  |  |  | endfunc | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-30 17:20:20 +02:00
										 |  |  | " First test a few things | 
					
						
							|  |  |  | let v:errors = [] | 
					
						
							|  |  |  | if Test_AddLinesToCharDict() || Test_CharDictToPairList() | 
					
						
							|  |  |  |   finish | 
					
						
							|  |  |  | endif | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-21 14:22:00 +02:00
										 |  |  | " Try to avoid hitting E36 | 
					
						
							|  |  |  | set equalalways | 
					
						
							| 
									
										
										
										
											2010-01-12 19:52:03 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | " Edit the Unicode text file.  Requires the netrw plugin. | 
					
						
							|  |  |  | edit http://unicode.org/Public/UNIDATA/UnicodeData.txt | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Parse each line, create a list of lists. | 
					
						
							|  |  |  | call ParseDataToProps() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Build the toLower table. | 
					
						
							|  |  |  | call BuildCaseTable("Lower", 13) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Build the toUpper table. | 
					
						
							|  |  |  | call BuildCaseTable("Upper", 12) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Build the ranges of composing chars. | 
					
						
							|  |  |  | call BuildCombiningTable() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Edit the case folding text file.  Requires the netrw plugin. | 
					
						
							|  |  |  | edit http://www.unicode.org/Public/UNIDATA/CaseFolding.txt | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Parse each line, create a list of lists. | 
					
						
							|  |  |  | call ParseFoldProps() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Build the foldCase table. | 
					
						
							|  |  |  | call BuildFoldTable() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Edit the width text file.  Requires the netrw plugin. | 
					
						
							|  |  |  | edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Parse each line, create a list of lists. | 
					
						
							|  |  |  | call ParseWidthProps() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  | " Build the double width table. | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  | let s:doubletable = [] | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  | call BuildWidthTable('[WF]', 'doublewidth') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | " Build the ambiguous width table. | 
					
						
							| 
									
										
										
										
											2016-03-21 22:09:44 +01:00
										 |  |  | let s:ambitable = [] | 
					
						
							| 
									
										
										
										
											2010-01-27 18:29:26 +01:00
										 |  |  | call BuildWidthTable('A', 'ambiguous') | 
					
						
							| 
									
										
										
										
											2016-03-19 18:42:29 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | " Edit the emoji text file.  Requires the netrw plugin. | 
					
						
							| 
									
										
										
										
											2022-09-25 19:25:51 +01:00
										 |  |  | " commented out, because it drops too many characters | 
					
						
							|  |  |  | "edit https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt | 
					
						
							|  |  |  | " | 
					
						
							|  |  |  | "" Build the emoji table. Ver. 1.0 - 6.0 | 
					
						
							|  |  |  | "" Must come after the "ambiguous" and "doublewidth" tables | 
					
						
							|  |  |  | "call BuildEmojiTable() |