unicode

package unicode

import "unicode"

unicode 包提供了用于测试 Unicode 码点某些属性的数据和函数。

Example (Is)

Functions starting with "Is" can be used to inspect which table of range a rune belongs to. Note that runes may fit into more than one range.

package main

import (
	"fmt"
	"unicode"
)

func main() {

	// constant with mixed type runes
	const mixed = "\b5Ὂg̀9! ℃ᾭG"
	for _, c := range mixed {
		fmt.Printf("For %q:\n", c)
		if unicode.IsControl(c) {
			fmt.Println("\tis control rune")
		}
		if unicode.IsDigit(c) {
			fmt.Println("\tis digit rune")
		}
		if unicode.IsGraphic(c) {
			fmt.Println("\tis graphic rune")
		}
		if unicode.IsLetter(c) {
			fmt.Println("\tis letter rune")
		}
		if unicode.IsLower(c) {
			fmt.Println("\tis lower case rune")
		}
		if unicode.IsMark(c) {
			fmt.Println("\tis mark rune")
		}
		if unicode.IsNumber(c) {
			fmt.Println("\tis number rune")
		}
		if unicode.IsPrint(c) {
			fmt.Println("\tis printable rune")
		}
		if !unicode.IsPrint(c) {
			fmt.Println("\tis not printable rune")
		}
		if unicode.IsPunct(c) {
			fmt.Println("\tis punct rune")
		}
		if unicode.IsSpace(c) {
			fmt.Println("\tis space rune")
		}
		if unicode.IsSymbol(c) {
			fmt.Println("\tis symbol rune")
		}
		if unicode.IsTitle(c) {
			fmt.Println("\tis title case rune")
		}
		if unicode.IsUpper(c) {
			fmt.Println("\tis upper case rune")
		}
	}

}

Output:

For '\b':
	is control rune
	is not printable rune
For '5':
	is digit rune
	is graphic rune
	is number rune
	is printable rune
For 'Ὂ':
	is graphic rune
	is letter rune
	is printable rune
	is upper case rune
For 'g':
	is graphic rune
	is letter rune
	is lower case rune
	is printable rune
For '̀':
	is graphic rune
	is mark rune
	is printable rune
For '9':
	is digit rune
	is graphic rune
	is number rune
	is printable rune
For '!':
	is graphic rune
	is printable rune
	is punct rune
For ' ':
	is graphic rune
	is printable rune
	is space rune
For '℃':
	is graphic rune
	is printable rune
	is symbol rune
For 'ᾭ':
	is graphic rune
	is letter rune
	is printable rune
	is title case rune
For 'G':
	is graphic rune
	is letter rune
	is printable rune
	is upper case rune

Index

Constants
Variables
func In(r rune, ranges ...*RangeTable) bool
func Is(rangeTab *RangeTable, r rune) bool
func IsControl(r rune) bool
func IsDigit(r rune) bool
func IsGraphic(r rune) bool
func IsLetter(r rune) bool
func IsLower(r rune) bool
func IsMark(r rune) bool
func IsNumber(r rune) bool
func IsOneOf(ranges []*RangeTable, r rune) bool
func IsPrint(r rune) bool
func IsPunct(r rune) bool
func IsSpace(r rune) bool
func IsSymbol(r rune) bool
func IsTitle(r rune) bool
func IsUpper(r rune) bool
func SimpleFold(r rune) rune
func To(_case int, r rune) rune
func ToLower(r rune) rune
func ToTitle(r rune) rune
func ToUpper(r rune) rune
type CaseRange
type Range16
type Range32
type RangeTable
type SpecialCase
- func (special SpecialCase) ToLower(r rune) rune
- func (special SpecialCase) ToTitle(r rune) rune
- func (special SpecialCase) ToUpper(r rune) rune

Constants

const (
	MaxRune         = '\U0010FFFF' // 最大有效 Unicode 码点。
	ReplacementChar = '\uFFFD'     // 表示无效码点。
	MaxASCII        = '\u007F'     // 最大 ASCII 值。
	MaxLatin1       = '\u00FF'     // 最大 Latin-1 值。
)

const (
	UpperCase = iota
	LowerCase
	TitleCase
	MaxCase
)

CaseRanges 中 Delta 数组的索引，用于大小写映射。

const (	UpperLower )

如果 CaseRange 的 Delta 字段为 UpperLower，则表示该 CaseRange 表示如下形式的序列（例如）： Upper Lower Upper Lower。

const V Version is the Unicode edition from which the tables are derived. 
Variables
 var (	Cc     	Cf     	Cn     	Co     	Cs     	Digit 	Nd     	LC     	Letter 	L      	Lm     	Lo     	Lower 	Ll     	Mark 	M      	Mc     	Me     	Mn     	Nl     	No     	Number 	N      	Other 	C      	Pc     	Pd     	Pe     	Pf     	Pi     	Po     	Ps     	Punct 	P      	Sc     	Sk     	Sm     	So     	Space 	Z      	Symbol 	S      	Title 	Lt     	Upper 	Lu     	Zl     	Zp     	Zs     )
 These variables have type *RangeTable. 
var (	Adlam 	Ahom 	Anatolian_Hieroglyphs	Arabic 	Armenian 	Avestan 	Balinese 	Bamum 	Bassa_Vah 	Batak 	Bengali 	Bhaiksuki 	Bopomofo 	Brahmi 	Braille 	Buginese 	Buhid 	Canadian_Aboriginal	Carian 	Caucasian_Albanian	Chakma 	Cham 	Cherokee 	Chorasmian 	Common 	Coptic 	Cuneiform 	Cypriot 	Cypro_Minoan 	Cyrillic 	Deseret 	Devanagari 	Dives_Akuru 	Dogra 	Duployan 	Egyptian_Hieroglyphs	Elbasan 	Elymaic 	Ethiopic 	Georgian 	Glagolitic 	Gothic 	Grantha 	Greek 	Gujarati 	Gunjala_Gondi 	Gurmukhi 	Han 	Hangul 	Hanifi_Rohingya 	Hanunoo 	Hatran 	Hebrew 	Hiragana 	Imperial_Aramaic 	Inherited 	Inscriptional_Pahlavi	Inscriptional_Parthian	Javanese 	Kaithi 	Kannada 	Katakana 	Kawi 	Kayah_Li 	Kharoshthi 	Khitan_Small_Script	Khmer 	Khojki 	Khudawadi 	Lao 	Latin 	Lepcha 	Limbu 	Linear_A 	Linear_B 	Lisu 	Lycian 	Lydian 	Mahajani 	Makasar 	Malayalam 	Mandaic 	Manichaean 	Marchen 	Masaram_Gondi 	Medefaidrin 	Meetei_Mayek 	Mende_Kikakui 	Meroitic_Cursive 	Meroitic_Hieroglyphs	Miao 	Modi 	Mongolian 	Mro 	Multani 	Myanmar 	Nabataean 	Nag_Mundari 	Nandinagari 	New_Tai_Lue 	Newa 	Nko 	Nushu 	Nyiakeng_Puachue_Hmong	Ogham 	Ol_Chiki 	Old_Hungarian 	Old_Italic 	Old_North_Arabian	Old_Permic 	Old_Persian 	Old_Sogdian 	Old_South_Arabian	Old_Turkic 	Old_Uyghur 	Oriya 	Osage 	Osmanya 	Pahawh_Hmong 	Palmyrene 	Pau_Cin_Hau 	Phags_Pa 	Phoenician 	Psalter_Pahlavi 	Rejang 	Runic 	Samaritan 	Saurashtra 	Sharada 	Shavian 	Siddham 	SignWriting 	Sinhala 	Sogdian 	Sora_Sompeng 	Soyombo 	Sundanese 	Syloti_Nagri 	Syriac 	Tagalog 	Tagbanwa 	Tai_Le 	Tai_Tham 	Tai_Viet 	Takri 	Tamil 	Tangsa 	Tangut 	Telugu 	Thaana 	Thai 	Tibetan 	Tifinagh 	Tirhuta 	Toto 	Ugaritic 	Vai 	Vithkuqi 	Wancho 	Warang_Citi 	Yezidi 	Yi 	Zanabazar_Square )
 These variables have type *RangeTable. 
var (	ASCII_Hex_Digit 	Bidi_Control 	Dash 	Deprecated 	Diacritic 	Extender 	Hex_Digit 	Hyphen 	IDS_Binary_Operator	IDS_Trinary_Operator	Ideographic 	Join_Control 	Logical_Order_Exception	Noncharacter_Code_Point	Other_Alphabetic 	Other_Default_Ig 	Other_Grapheme_Extend	Other_ID_Continue	Other_ID_Start 	Other_Lowercase 	Other_Math 	Other_Uppercase 	Pattern_Syntax 	Pattern_White_Space	Prepended_Concatenatio 	Quotation_Mark 	Radical 	Regional_Indicator	STerm 	Sentence_Terminal	Soft_Dotted 	Terminal_Punctuation	Unified_Ideograph	Variation_Selector	White_Space )
 These variables have type *RangeTable. 
var  CaseRanges is the table describing case mappings for all letters with non-self mappings. 
var  	"C": 	"Cc": 	"Cf": 	"Cn": 	"Co": 	"Cs": 	"L": 	"LC": 	"Ll": 	"Lm": 	"Lo": 	"Lt": 	"Lu": 	"M": 	"Mc": 	"Me": 	"Mn": 	"N": 	"Nd": 	"Nl": 	"No": 	"P": 	"Pc": 	"Pd": 	"Pe": 	"Pf": 	"Pi": 	"Po": 	"Ps": 	"S": 	"Sc": 	"Sk": 	"Sm": 	"So": 	"Z": 	"Zl": 	"Zp": 	"Zs": }
 Categories is the set of Unicode category tables. 
var 	"Cased_Letter": 	"Close_Punctuation": 	"Combining_Mark": 	"Connector_Punctuation":	"Control": 	"Currency_Symbol": 	"Dash_Punctuation": 	"Decimal_Number": 	"Enclosing_Mark": 	"Final_Punctuation": 	"Format": 	"Initial_Punctuation":	"Letter": 	"Letter_Number": 	"Line_Separator": 	"Lowercase_Letter": 	"Mark": 	"Math_Symbol": 	"Modifier_Letter": 	"Modifier_Symbol": 	"Nonspacing_Mark": 	"Number": 	"Open_Punctuation": 	"Other": 	"Other_Letter": 	"Other_Number": 	"Other_Punctuation": 	"Other_Symbol": 	"Paragraph_Separator":	"Private_Use": 	"Punctuation": 	"Separator": 	"Space_Separator": 	"Spacing_Mark": 	"Surrogate": 	"Symbol": 	"Titlecase_Letter": 	"Unassigned": 	"Uppercase_Letter": 	"cntrl": 	"digit": 	"punct": }
 CategoryAliases maps category aliases to standard category names. 
var 	"L": 	"Ll": 	"Lt": 	"Lu": 	"M": 	"Mn": }
 FoldCategory maps a category name to a table of code points outside the category that are equivalent under simple case folding to code points inside the category. If there is no entry for a category name, there are no such points. 
var  	"Common": 	"Greek": 	"Inherited":}
 FoldScript maps a script name to a table of code points outside the script that are equivalent under simple case folding to code points inside the script. If there is no entry for a script name, there are no such points. 
var 	L,}
 GraphicRanges 定义了按 Unicode 标准划分的图形字符集合。 
var 	L,}
 PrintRanges 定义了按 Go 标准划分的可打印字符集合。 ASCII 空格 U+0020 单独处理。 
var  	"ASCII_Hex_Digit": 	"Bidi_Control": 	"Dash": 	"Deprecated": 	"Diacritic": 	"Extender": 	"Hex_Digit": 	"Hyphen": 	"IDS_Binary_Operator":	"IDS_Trinary_Operator":	"Ideographic": 	"Join_Control": 	"Logical_Order_Exception":	"Noncharacter_Code_Point":	"Other_Alphabetic": 	"Other_Default_Ignorable_Code_Point"	"Other_Grapheme_Extend":	"Other_ID_Continue": 	"Other_ID_Start": 	"Other_Lowercase": 	"Other_Math": 	"Other_Uppercase": 	"Pattern_Syntax": 	"Pattern_White_Space":	"Prepended_Concatenation_Mark":	"Quotation_Mark": 	"Radical": 	"Regional_Indicator": 	"Sentence_Terminal": 	"STerm": 	"Soft_Dotted": 	"Terminal_Punctuation":	"Unified_Ideograph": 	"Variation_Selector": 	"White_Space": }
 Properties is the set of Unicode property tables. 
var Scr 	"Adlam": 	"Ahom": 	"Anatolian_Hieroglyphs":	"Arabic": 	"Armenian": 	"Avestan": 	"Balinese": 	"Bamum": 	"Bassa_Vah": 	"Batak": 	"Bengali": 	"Bhaiksuki": 	"Bopomofo": 	"Brahmi": 	"Braille": 	"Buginese": 	"Buhid": 	"Canadian_Aboriginal":	"Carian": 	"Caucasian_Albanian": 	"Chakma": 	"Cham": 	"Cherokee": 	"Chorasmian": 	"Common": 	"Coptic": 	"Cuneiform": 	"Cypriot": 	"Cypro_Minoan": 	"Cyrillic": 	"Deseret": 	"Devanagari": 	"Dives_Akuru": 	"Dogra": 	"Duployan": 	"Egyptian_Hieroglyphs":	"Elbasan": 	"Elymaic": 	"Ethiopic": 	"Georgian": 	"Glagolitic": 	"Gothic": 	"Grantha": 	"Greek": 	"Gujarati": 	"Gunjala_Gondi": 	"Gurmukhi": 	"Han": 	"Hangul": 	"Hanifi_Rohingya": 	"Hanunoo": 	"Hatran": 	"Hebrew": 	"Hiragana": 	"Imperial_Aramaic": 	"Inherited": 	"Inscriptional_Pahlavi":	"Inscriptional_Parthian":	"Javanese": 	"Kaithi": 	"Kannada": 	"Katakana": 	"Kawi": 	"Kayah_Li": 	"Kharoshthi": 	"Khitan_Small_Script":	"Khmer": 	"Khojki": 	"Khudawadi": 	"Lao": 	"Latin": 	"Lepcha": 	"Limbu": 	"Linear_A": 	"Linear_B": 	"Lisu": 	"Lycian": 	"Lydian": 	"Mahajani": 	"Makasar": 	"Malayalam": 	"Mandaic": 	"Manichaean": 	"Marchen": 	"Masaram_Gondi": 	"Medefaidrin": 	"Meetei_Mayek": 	"Mende_Kikakui": 	"Meroitic_Cursive": 	"Meroitic_Hieroglyphs":	"Miao": 	"Modi": 	"Mongolian": 	"Mro": 	"Multani": 	"Myanmar": 	"Nabataean": 	"Nag_Mundari": 	"Nandinagari": 	"New_Tai_Lue": 	"Newa": 	"Nko": 	"Nushu": 	"Nyiakeng_Puachue_Hmong":	"Ogham": 	"Ol_Chiki": 	"Old_Hungarian": 	"Old_Italic": 	"Old_North_Arabian": 	"Old_Permic": 	"Old_Persian": 	"Old_Sogdian": 	"Old_South_Arabian": 	"Old_Turkic": 	"Old_Uyghur": 	"Oriya": 	"Osage": 	"Osmanya": 	"Pahawh_Hmong": 	"Palmyrene": 	"Pau_Cin_Hau": 	"Phags_Pa": 	"Phoenician": 	"Psalter_Pahlavi": 	"Rejang": 	"Runic": 	"Samaritan": 	"Saurashtra": 	"Sharada": 	"Shavian": 	"Siddham": 	"SignWriting": 	"Sinhala": 	"Sogdian": 	"Sora_Sompeng": 	"Soyombo": 	"Sundanese": 	"Syloti_Nagri": 	"Syriac": 	"Tagalog": 	"Tagbanwa": 	"Tai_Le": 	"Tai_Tham": 	"Tai_Viet": 	"Takri": 	"Tamil": 	"Tangsa": 	"Tangut": 	"Telugu": 	"Thaana": 	"Thai": 	"Tibetan": 	"Tifinagh": 	"Tirhuta": 	"Toto": 	"Ugaritic": 	"Vai": 	"Vithkuqi": 	"Wancho": 	"Warang_Citi": 	"Yezidi": 	"Yi": 	"Zanabazar_Square": }
 Scripts is the set of Unicode script tables. 
Functions
 func In
 func InIn 报告该 rune 是否属于某个范围的成员。 func Is
 func IsIs 报告该 rune 是否在指定的范围表中。 func IsControl
 func IsControlIsControl 报告该 rune 是否为控制字符。 C（Other）Unicode 分类包含更多的码点， 例如代理项；使用 Is(C, r) 来测试它们。 func IsDigit
 func IsDigitIsDigit 报告该 rune 是否为十进制数字。  Example
 package main import ( 	"fmt" 	"unicode" )  func main()	fmt.Printf	fmt.Printf}
  Output:

class="w"> = MaxRune + 1 // （不可能是有效的 delta 值。） ersion = "15.0.0"
 class="w"> class="p">= _Cc // Cc is the set of Unicode characters in category Cc (Other, control). class="p">= _Cf // Cf is the set of Unicode characters in category Cf (Other, format). class="p">= _Cn // Cn is the set of Unicode characters in category Cn (Other, not assigned). class="p">= _Co // Co is the set of Unicode characters in category Co (Other, private use). class="p">= _Cs // Cs is the set of Unicode characters in category Cs (Other, surrogate). = _Nd // Digit is the set of Unicode characters with the "decimal digit" property. class="p">= _Nd // Nd is the set of Unicode characters in category Nd (Number, decimal digit). class="p">= _LC // LC is the set of Unicode characters in category LC (Letter, cased: Ll | Lt | Lu). = _L  // Letter/L is the set of Unicode letters, category L. class="p">= _L class="p">= _Lm // Lm is the set of Unicode characters in category Lm (Letter, modifier). class="p">= _Lo // Lo is the set of Unicode characters in category Lo (Letter, other). = _Ll // Lower is the set of Unicode lower case letters. class="p">= _Ll // Ll is the set of Unicode characters in category Ll (Letter, lowercase). = _M  // Mark/M is the set of Unicode mark characters, category M. class="p">= _M class="p">= _Mc // Mc is the set of Unicode characters in category Mc (Mark, spacing combining). class="p">= _Me // Me is the set of Unicode characters in category Me (Mark, enclosing). class="p">= _Mn // Mn is the set of Unicode characters in category Mn (Mark, nonspacing). class="p">= _Nl // Nl is the set of Unicode characters in category Nl (Number, letter). class="p">= _No // No is the set of Unicode characters in category No (Number, other). = _N  // Number/N is the set of Unicode number characters, category N. class="p">= _N = _C // Other/C is the set of Unicode control, special, and unassigned code points, category C. class="p">= _C class="p">= _Pc // Pc is the set of Unicode characters in category Pc (Punctuation, connector). class="p">= _Pd // Pd is the set of Unicode characters in category Pd (Punctuation, dash). class="p">= _Pe // Pe is the set of Unicode characters in category Pe (Punctuation, close). class="p">= _Pf // Pf is the set of Unicode characters in category Pf (Punctuation, final quote). class="p">= _Pi // Pi is the set of Unicode characters in category Pi (Punctuation, initial quote). class="p">= _Po // Po is the set of Unicode characters in category Po (Punctuation, other). class="p">= _Ps // Ps is the set of Unicode characters in category Ps (Punctuation, open). = _P  // Punct/P is the set of Unicode punctuation characters, category P. class="p">= _P class="p">= _Sc // Sc is the set of Unicode characters in category Sc (Symbol, currency). class="p">= _Sk // Sk is the set of Unicode characters in category Sk (Symbol, modifier). class="p">= _Sm // Sm is the set of Unicode characters in category Sm (Symbol, math). class="p">= _So // So is the set of Unicode characters in category So (Symbol, other). = _Z  // Space/Z is the set of Unicode space characters, category Z. class="p">= _Z = _S // Symbol/S is the set of Unicode symbol characters, category S. class="p">= _S = _Lt // Title is the set of Unicode title case letters. class="p">= _Lt // Lt is the set of Unicode characters in category Lt (Letter, titlecase). = _Lu // Upper is the set of Unicode upper case letters. class="p">= _Lu // Lu is the set of Unicode characters in category Lu (Letter, uppercase). class="p">= _Zl // Zl is the set of Unicode characters in category Zl (Separator, line). class="p">= _Zp // Zp is the set of Unicode characters in category Zp (Separator, paragraph). class="p">= _Zs // Zs is the set of Unicode characters in category Zs (Separator, space). class="w"> = _Adlam                  // Adlam is the set of Unicode characters in script Adlam. = _Ahom                   // Ahom is the set of Unicode characters in script Ahom. class="w">  = _Anatolian_Hieroglyphs  // Anatolian_Hieroglyphs is the set of Unicode characters in script Anatolian_Hieroglyphs. = _Arabic                 // Arabic is the set of Unicode characters in script Arabic. = _Armenian               // Armenian is the set of Unicode characters in script Armenian. = _Avestan                // Avestan is the set of Unicode characters in script Avestan. = _Balinese               // Balinese is the set of Unicode characters in script Balinese. = _Bamum                  // Bamum is the set of Unicode characters in script Bamum. = _Bassa_Vah              // Bassa_Vah is the set of Unicode characters in script Bassa_Vah. = _Batak                  // Batak is the set of Unicode characters in script Batak. = _Bengali                // Bengali is the set of Unicode characters in script Bengali. = _Bhaiksuki              // Bhaiksuki is the set of Unicode characters in script Bhaiksuki. = _Bopomofo               // Bopomofo is the set of Unicode characters in script Bopomofo. = _Brahmi                 // Brahmi is the set of Unicode characters in script Brahmi. = _Braille                // Braille is the set of Unicode characters in script Braille. = _Buginese               // Buginese is the set of Unicode characters in script Buginese. = _Buhid                  // Buhid is the set of Unicode characters in script Buhid. class="w">    = _Canadian_Aboriginal    // Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal. = _Carian                 // Carian is the set of Unicode characters in script Carian. class="w">     = _Caucasian_Albanian     // Caucasian_Albanian is the set of Unicode characters in script Caucasian_Albanian. = _Chakma                 // Chakma is the set of Unicode characters in script Chakma. = _Cham                   // Cham is the set of Unicode characters in script Cham. = _Cherokee               // Cherokee is the set of Unicode characters in script Cherokee. = _Chorasmian             // Chorasmian is the set of Unicode characters in script Chorasmian. = _Common                 // Common is the set of Unicode characters in script Common. = _Coptic                 // Coptic is the set of Unicode characters in script Coptic. = _Cuneiform              // Cuneiform is the set of Unicode characters in script Cuneiform. = _Cypriot                // Cypriot is the set of Unicode characters in script Cypriot. = _Cypro_Minoan           // Cypro_Minoan is the set of Unicode characters in script Cypro_Minoan. = _Cyrillic               // Cyrillic is the set of Unicode characters in script Cyrillic. = _Deseret                // Deseret is the set of Unicode characters in script Deseret. = _Devanagari             // Devanagari is the set of Unicode characters in script Devanagari. = _Dives_Akuru            // Dives_Akuru is the set of Unicode characters in script Dives_Akuru. = _Dogra                  // Dogra is the set of Unicode characters in script Dogra. = _Duployan               // Duployan is the set of Unicode characters in script Duployan. class="w">   = _Egyptian_Hieroglyphs   // Egyptian_Hieroglyphs is the set of Unicode characters in script Egyptian_Hieroglyphs. = _Elbasan                // Elbasan is the set of Unicode characters in script Elbasan. = _Elymaic                // Elymaic is the set of Unicode characters in script Elymaic. = _Ethiopic               // Ethiopic is the set of Unicode characters in script Ethiopic. = _Georgian               // Georgian is the set of Unicode characters in script Georgian. = _Glagolitic             // Glagolitic is the set of Unicode characters in script Glagolitic. = _Gothic                 // Gothic is the set of Unicode characters in script Gothic. = _Grantha                // Grantha is the set of Unicode characters in script Grantha. = _Greek                  // Greek is the set of Unicode characters in script Greek. = _Gujarati               // Gujarati is the set of Unicode characters in script Gujarati. = _Gunjala_Gondi          // Gunjala_Gondi is the set of Unicode characters in script Gunjala_Gondi. = _Gurmukhi               // Gurmukhi is the set of Unicode characters in script Gurmukhi. = _Han                    // Han is the set of Unicode characters in script Han. = _Hangul                 // Hangul is the set of Unicode characters in script Hangul. = _Hanifi_Rohingya        // Hanifi_Rohingya is the set of Unicode characters in script Hanifi_Rohingya. = _Hanunoo                // Hanunoo is the set of Unicode characters in script Hanunoo. = _Hatran                 // Hatran is the set of Unicode characters in script Hatran. = _Hebrew                 // Hebrew is the set of Unicode characters in script Hebrew. = _Hiragana               // Hiragana is the set of Unicode characters in script Hiragana. = _Imperial_Aramaic       // Imperial_Aramaic is the set of Unicode characters in script Imperial_Aramaic. = _Inherited              // Inherited is the set of Unicode characters in script Inherited. class="w">  = _Inscriptional_Pahlavi  // Inscriptional_Pahlavi is the set of Unicode characters in script Inscriptional_Pahlavi. class="w"> = _Inscriptional_Parthian // Inscriptional_Parthian is the set of Unicode characters in script Inscriptional_Parthian. = _Javanese               // Javanese is the set of Unicode characters in script Javanese. = _Kaithi                 // Kaithi is the set of Unicode characters in script Kaithi. = _Kannada                // Kannada is the set of Unicode characters in script Kannada. = _Katakana               // Katakana is the set of Unicode characters in script Katakana. = _Kawi                   // Kawi is the set of Unicode characters in script Kawi. = _Kayah_Li               // Kayah_Li is the set of Unicode characters in script Kayah_Li. = _Kharoshthi             // Kharoshthi is the set of Unicode characters in script Kharoshthi. class="w">    = _Khitan_Small_Script    // Khitan_Small_Script is the set of Unicode characters in script Khitan_Small_Script. = _Khmer                  // Khmer is the set of Unicode characters in script Khmer. = _Khojki                 // Khojki is the set of Unicode characters in script Khojki. = _Khudawadi              // Khudawadi is the set of Unicode characters in script Khudawadi. = _Lao                    // Lao is the set of Unicode characters in script Lao. = _Latin                  // Latin is the set of Unicode characters in script Latin. = _Lepcha                 // Lepcha is the set of Unicode characters in script Lepcha. = _Limbu                  // Limbu is the set of Unicode characters in script Limbu. = _Linear_A               // Linear_A is the set of Unicode characters in script Linear_A. = _Linear_B               // Linear_B is the set of Unicode characters in script Linear_B. = _Lisu                   // Lisu is the set of Unicode characters in script Lisu. = _Lycian                 // Lycian is the set of Unicode characters in script Lycian. = _Lydian                 // Lydian is the set of Unicode characters in script Lydian. = _Mahajani               // Mahajani is the set of Unicode characters in script Mahajani. = _Makasar                // Makasar is the set of Unicode characters in script Makasar. = _Malayalam              // Malayalam is the set of Unicode characters in script Malayalam. = _Mandaic                // Mandaic is the set of Unicode characters in script Mandaic. = _Manichaean             // Manichaean is the set of Unicode characters in script Manichaean. = _Marchen                // Marchen is the set of Unicode characters in script Marchen. = _Masaram_Gondi          // Masaram_Gondi is the set of Unicode characters in script Masaram_Gondi. = _Medefaidrin            // Medefaidrin is the set of Unicode characters in script Medefaidrin. = _Meetei_Mayek           // Meetei_Mayek is the set of Unicode characters in script Meetei_Mayek. = _Mende_Kikakui          // Mende_Kikakui is the set of Unicode characters in script Mende_Kikakui. = _Meroitic_Cursive       // Meroitic_Cursive is the set of Unicode characters in script Meroitic_Cursive. class="w">   = _Meroitic_Hieroglyphs   // Meroitic_Hieroglyphs is the set of Unicode characters in script Meroitic_Hieroglyphs. = _Miao                   // Miao is the set of Unicode characters in script Miao. = _Modi                   // Modi is the set of Unicode characters in script Modi. = _Mongolian              // Mongolian is the set of Unicode characters in script Mongolian. = _Mro                    // Mro is the set of Unicode characters in script Mro. = _Multani                // Multani is the set of Unicode characters in script Multani. = _Myanmar                // Myanmar is the set of Unicode characters in script Myanmar. = _Nabataean              // Nabataean is the set of Unicode characters in script Nabataean. = _Nag_Mundari            // Nag_Mundari is the set of Unicode characters in script Nag_Mundari. = _Nandinagari            // Nandinagari is the set of Unicode characters in script Nandinagari. = _New_Tai_Lue            // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue. = _Newa                   // Newa is the set of Unicode characters in script Newa. = _Nko                    // Nko is the set of Unicode characters in script Nko. = _Nushu                  // Nushu is the set of Unicode characters in script Nushu. class="w"> = _Nyiakeng_Puachue_Hmong // Nyiakeng_Puachue_Hmong is the set of Unicode characters in script Nyiakeng_Puachue_Hmong. = _Ogham                  // Ogham is the set of Unicode characters in script Ogham. = _Ol_Chiki               // Ol_Chiki is the set of Unicode characters in script Ol_Chiki. = _Old_Hungarian          // Old_Hungarian is the set of Unicode characters in script Old_Hungarian. = _Old_Italic             // Old_Italic is the set of Unicode characters in script Old_Italic. class="w">      = _Old_North_Arabian      // Old_North_Arabian is the set of Unicode characters in script Old_North_Arabian. = _Old_Permic             // Old_Permic is the set of Unicode characters in script Old_Permic. = _Old_Persian            // Old_Persian is the set of Unicode characters in script Old_Persian. = _Old_Sogdian            // Old_Sogdian is the set of Unicode characters in script Old_Sogdian. class="w">      = _Old_South_Arabian      // Old_South_Arabian is the set of Unicode characters in script Old_South_Arabian. = _Old_Turkic             // Old_Turkic is the set of Unicode characters in script Old_Turkic. = _Old_Uyghur             // Old_Uyghur is the set of Unicode characters in script Old_Uyghur. = _Oriya                  // Oriya is the set of Unicode characters in script Oriya. = _Osage                  // Osage is the set of Unicode characters in script Osage. = _Osmanya                // Osmanya is the set of Unicode characters in script Osmanya. = _Pahawh_Hmong           // Pahawh_Hmong is the set of Unicode characters in script Pahawh_Hmong. = _Palmyrene              // Palmyrene is the set of Unicode characters in script Palmyrene. = _Pau_Cin_Hau            // Pau_Cin_Hau is the set of Unicode characters in script Pau_Cin_Hau. = _Phags_Pa               // Phags_Pa is the set of Unicode characters in script Phags_Pa. = _Phoenician             // Phoenician is the set of Unicode characters in script Phoenician. = _Psalter_Pahlavi        // Psalter_Pahlavi is the set of Unicode characters in script Psalter_Pahlavi. = _Rejang                 // Rejang is the set of Unicode characters in script Rejang. = _Runic                  // Runic is the set of Unicode characters in script Runic. = _Samaritan              // Samaritan is the set of Unicode characters in script Samaritan. = _Saurashtra             // Saurashtra is the set of Unicode characters in script Saurashtra. = _Sharada                // Sharada is the set of Unicode characters in script Sharada. = _Shavian                // Shavian is the set of Unicode characters in script Shavian. = _Siddham                // Siddham is the set of Unicode characters in script Siddham. = _SignWriting            // SignWriting is the set of Unicode characters in script SignWriting. = _Sinhala                // Sinhala is the set of Unicode characters in script Sinhala. = _Sogdian                // Sogdian is the set of Unicode characters in script Sogdian. = _Sora_Sompeng           // Sora_Sompeng is the set of Unicode characters in script Sora_Sompeng. = _Soyombo                // Soyombo is the set of Unicode characters in script Soyombo. = _Sundanese              // Sundanese is the set of Unicode characters in script Sundanese. = _Syloti_Nagri           // Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri. = _Syriac                 // Syriac is the set of Unicode characters in script Syriac. = _Tagalog                // Tagalog is the set of Unicode characters in script Tagalog. = _Tagbanwa               // Tagbanwa is the set of Unicode characters in script Tagbanwa. = _Tai_Le                 // Tai_Le is the set of Unicode characters in script Tai_Le. = _Tai_Tham               // Tai_Tham is the set of Unicode characters in script Tai_Tham. = _Tai_Viet               // Tai_Viet is the set of Unicode characters in script Tai_Viet. = _Takri                  // Takri is the set of Unicode characters in script Takri. = _Tamil                  // Tamil is the set of Unicode characters in script Tamil. = _Tangsa                 // Tangsa is the set of Unicode characters in script Tangsa. = _Tangut                 // Tangut is the set of Unicode characters in script Tangut. = _Telugu                 // Telugu is the set of Unicode characters in script Telugu. = _Thaana                 // Thaana is the set of Unicode characters in script Thaana. = _Thai                   // Thai is the set of Unicode characters in script Thai. = _Tibetan                // Tibetan is the set of Unicode characters in script Tibetan. = _Tifinagh               // Tifinagh is the set of Unicode characters in script Tifinagh. = _Tirhuta                // Tirhuta is the set of Unicode characters in script Tirhuta. = _Toto                   // Toto is the set of Unicode characters in script Toto. = _Ugaritic               // Ugaritic is the set of Unicode characters in script Ugaritic. = _Vai                    // Vai is the set of Unicode characters in script Vai. = _Vithkuqi               // Vithkuqi is the set of Unicode characters in script Vithkuqi. = _Wancho                 // Wancho is the set of Unicode characters in script Wancho. = _Warang_Citi            // Warang_Citi is the set of Unicode characters in script Warang_Citi. = _Yezidi                 // Yezidi is the set of Unicode characters in script Yezidi. = _Yi                     // Yi is the set of Unicode characters in script Yi. = _Zanabazar_Square       // Zanabazar_Square is the set of Unicode characters in script Zanabazar_Square. class="w"> = _ASCII_Hex_Digit                    // ASCII_Hex_Digit is the set of Unicode characters with property ASCII_Hex_Digit. = _Bidi_Control                       // Bidi_Control is the set of Unicode characters with property Bidi_Control. = _Dash                               // Dash is the set of Unicode characters with property Dash. = _Deprecated                         // Deprecated is the set of Unicode characters with property Deprecated. = _Diacritic                          // Diacritic is the set of Unicode characters with property Diacritic. = _Extender                           // Extender is the set of Unicode characters with property Extender. = _Hex_Digit                          // Hex_Digit is the set of Unicode characters with property Hex_Digit. = _Hyphen                             // Hyphen is the set of Unicode characters with property Hyphen. class="w">                = _IDS_Binary_Operator                // IDS_Binary_Operator is the set of Unicode characters with property IDS_Binary_Operator. class="w">               = _IDS_Trinary_Operator               // IDS_Trinary_Operator is the set of Unicode characters with property IDS_Trinary_Operator. = _Ideographic                        // Ideographic is the set of Unicode characters with property Ideographic. = _Join_Control                       // Join_Control is the set of Unicode characters with property Join_Control. an>            = _Logical_Order_Exception            // Logical_Order_Exception is the set of Unicode characters with property Logical_Order_Exception. an>            = _Noncharacter_Code_Point            // Noncharacter_Code_Point is the set of Unicode characters with property Noncharacter_Code_Point. = _Other_Alphabetic                   // Other_Alphabetic is the set of Unicode characters with property Other_Alphabetic. norable_Code_Point = _Other_Default_Ignorable_Code_Point // Other_Default_Ignorable_Code_Point is the set of Unicode characters with property Other_Default_Ignorable_Code_Point. class="w">              = _Other_Grapheme_Extend              // Other_Grapheme_Extend is the set of Unicode characters with property Other_Grapheme_Extend. class="w">                  = _Other_ID_Continue                  // Other_ID_Continue is the set of Unicode characters with property Other_ID_Continue. = _Other_ID_Start                     // Other_ID_Start is the set of Unicode characters with property Other_ID_Start. = _Other_Lowercase                    // Other_Lowercase is the set of Unicode characters with property Other_Lowercase. = _Other_Math                         // Other_Math is the set of Unicode characters with property Other_Math. = _Other_Uppercase                    // Other_Uppercase is the set of Unicode characters with property Other_Uppercase. = _Pattern_Syntax                     // Pattern_Syntax is the set of Unicode characters with property Pattern_Syntax. class="w">                = _Pattern_White_Space                // Pattern_White_Space is the set of Unicode characters with property Pattern_White_Space. n_Mark       = _Prepended_Concatenation_Mark       // Prepended_Concatenation_Mark is the set of Unicode characters with property Prepended_Concatenation_Mark. = _Quotation_Mark                     // Quotation_Mark is the set of Unicode characters with property Quotation_Mark. = _Radical                            // Radical is the set of Unicode characters with property Radical. class="w">                 = _Regional_Indicator                 // Regional_Indicator is the set of Unicode characters with property Regional_Indicator. = _Sentence_Terminal                  // STerm is an alias for Sentence_Terminal. class="w">                  = _Sentence_Terminal                  // Sentence_Terminal is the set of Unicode characters with property Sentence_Terminal. = _Soft_Dotted                        // Soft_Dotted is the set of Unicode characters with property Soft_Dotted. class="w">               = _Terminal_Punctuation               // Terminal_Punctuation is the set of Unicode characters with property Terminal_Punctuation. class="w">                  = _Unified_Ideograph                  // Unified_Ideograph is the set of Unicode characters with property Unified_Ideograph. class="w">                 = _Variation_Selector                 // Variation_Selector is the set of Unicode characters with property Variation_Selector. = _White_Space                        // White_Space is the set of Unicode characters with property White_Space. CaseRanges = _CaseRanges
 Categories = map[string]*RangeTable{ C, Cc, Cf, Cn, Co, Cs, L, LC, Ll, Lm, Lo, Lt, Lu, M, Mc, Me, Mn, N, Nd, Nl, No, P, Pc, Pd, Pe, Pf, Pi, Po, Ps, S, Sc, Sk, Sm, So, Z, Zl, Zp, Zs, class="nx">CategoryAliases = map[string]string{ "LC", "Pe", "M", class="w"> "Pc", "Cc", "Sc", "Pd", "Nd", "Me", "Pf", "Cf", class="w">   "Pi", "L", "Nl", "Zl", "Ll", "M", "Sm", "Lm", "Sk", "Mn", "N", "Ps", "C", "Lo", "No", "Po", "So", class="w">   "Zp", "Co", "P", "Z", "Zs", "Mc", "Cs", "S", "Lt", "Cn", "Lu", "Cc", "Nd", "P", class="nx">FoldCategory = map[string]*RangeTable{ foldL, foldLl, foldLt, foldLu, foldM, foldMn, FoldScript = map[string]*RangeTable{ foldCommon, foldGreek, class="w"> foldInherited, class="nx">GraphicRanges = []*RangeTable{ class="w"> M, N, P, S, Zs, >PrintRanges = []*RangeTable{ class="w"> M, N, P, S, Properties = map[string]*RangeTable{ ASCII_Hex_Digit, Bidi_Control, Dash, Deprecated, Diacritic, Extender, Hex_Digit, Hyphen, class="w">                IDS_Binary_Operator, class="w">               IDS_Trinary_Operator, Ideographic, Join_Control, class="w">            Logical_Order_Exception, class="w">            Noncharacter_Code_Point, Other_Alphabetic, class="p">: Other_Default_Ignorable_Code_Point, class="w">              Other_Grapheme_Extend, Other_ID_Continue, Other_ID_Start, Other_Lowercase, Other_Math, Other_Uppercase, Pattern_Syntax, class="w">                Pattern_White_Space, class="w">       Prepended_Concatenation_Mark, Quotation_Mark, Radical, Regional_Indicator, Sentence_Terminal, Sentence_Terminal, Soft_Dotted, class="w">               Terminal_Punctuation, Unified_Ideograph, Variation_Selector, White_Space, ipts = map[string]*RangeTable{ Adlam, Ahom, class="w">  Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Balinese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi, Braille, Buginese, Buhid, class="w">    Canadian_Aboriginal, Carian, Caucasian_Albanian, Chakma, Cham, Cherokee, Chorasmian, Common, Coptic, Cuneiform, Cypriot, Cypro_Minoan, Cyrillic, Deseret, Devanagari, Dives_Akuru, Dogra, Duployan, class="w">   Egyptian_Hieroglyphs, Elbasan, Elymaic, Ethiopic, Georgian, Glagolitic, Gothic, Grantha, Greek, Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited, class="w">  Inscriptional_Pahlavi, class="w"> Inscriptional_Parthian, Javanese, Kaithi, Kannada, Katakana, Kawi, Kayah_Li, Kharoshthi, class="w">    Khitan_Small_Script, Khmer, Khojki, Khudawadi, Lao, Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Mahajani, Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi, Medefaidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, class="w">   Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar, Nabataean, Nag_Mundari, Nandinagari, New_Tai_Lue, Newa, Nko, Nushu, class="w"> Nyiakeng_Puachue_Hmong, Ogham, Ol_Chiki, Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sogdian, Old_South_Arabian, Old_Turkic, Old_Uyghur, Oriya, Osage, Osmanya, Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician, Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Shavian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham, Tai_Viet, Takri, Tamil, Tangsa, Tangut, Telugu, Thaana, Thai, Tibetan, Tifinagh, Tirhuta, Toto, Ugaritic, Vai, Vithkuqi, Wancho, Warang_Citi, Yezidi, Yi, Zanabazar_Square, class="p">(r rune, ranges ...*RangeTable) bool
 class="p">(rangeTab *RangeTable, r rune) bool
 class="p">(r rune) bool
 class="p">(r rune) bool

class="w"> class="w"> { class="p">("%t\n", unicode.IsDigit('৩')) class="p">("%t\n", unicode.IsDigit('A'))

true
false

func IsGraphic

func IsGraphic(r rune) bool

IsGraphic 报告该 rune 是否被 Unicode 定义为图形字符。此类字符包括字母、标记、数字、标点、符号和空格，来自分类 L、M、N、P、S、Zs。

func IsLetter

func IsLetter(r rune) bool

IsLetter 报告该 rune 是否为字母（分类 L）。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsLetter('A'))
	fmt.Printf("%t\n", unicode.IsLetter('7'))
}

Output:

true
false

func IsLower

func IsLower(r rune) bool

IsLower 报告该 rune 是否为小写字母。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsLower('a'))
	fmt.Printf("%t\n", unicode.IsLower('A'))
}

Output:

true
false

func IsMark

func IsMark(r rune) bool

IsMark 报告该 rune 是否为标记字符（分类 M）。

func IsNumber

func IsNumber(r rune) bool

IsNumber 报告该 rune 是否为数字（分类 N）。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsNumber('Ⅷ'))
	fmt.Printf("%t\n", unicode.IsNumber('A'))
}

Output:

true
false

func IsOneOf

func IsOneOf(ranges []*RangeTable, r rune) bool

IsOneOf 报告该 rune 是否属于某个范围的成员。函数 "In" 提供了更好的签名，应优先于 IsOneOf 使用。

func IsPrint

func IsPrint(r rune) bool

IsPrint 报告该 rune 是否被 Go 定义为可打印字符。此类字符包括字母、标记、数字、标点、符号以及 ASCII 空格字符，来自分类 L、M、N、P、S 和 ASCII 空格字符。该分类与 IsGraphic 相同，唯一的区别是仅有的间距字符为 ASCII 空格 U+0020。

func IsPunct

func IsPunct(r rune) bool

IsPunct 报告该 rune 是否为 Unicode 标点字符（分类 P）。

func IsSpace

func IsSpace(r rune) bool

IsSpace 报告该 rune 是否为 Unicode White Space 属性定义的空白字符；在 Latin-1 空间中包括：

'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP)。

其他空白字符的定义由分类 Z 和属性 Pattern_White_Space 设定。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsSpace(' '))
	fmt.Printf("%t\n", unicode.IsSpace('\n'))
	fmt.Printf("%t\n", unicode.IsSpace('\t'))
	fmt.Printf("%t\n", unicode.IsSpace('a'))
}

Output:

true
true
true
false

func IsSymbol

func IsSymbol(r rune) bool

IsSymbol 报告该 rune 是否为符号字符。

func IsTitle

func IsTitle(r rune) bool

IsTitle 报告该 rune 是否为标题大写字母。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsTitle('ǅ'))
	fmt.Printf("%t\n", unicode.IsTitle('a'))
}

Output:

true
false

func IsUpper

func IsUpper(r rune) bool

IsUpper 报告该 rune 是否为大写字母。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsUpper('A'))
	fmt.Printf("%t\n", unicode.IsUpper('a'))
}

Output:

true
false

func SimpleFold

func SimpleFold(r rune) rune

SimpleFold 遍历在 Unicode 定义的简单大小写折叠下等价的 Unicode 码点。在与该 rune 等价的码点中（包括 rune 本身），SimpleFold 返回大于 r 的最小 rune（如果存在），否则返回 >= 0 的最小 rune。如果 r 不是有效的 Unicode 码点，SimpleFold(r) 返回 r。

例如：

SimpleFold('A') = 'a'
SimpleFold('a') = 'A'

SimpleFold('K') = 'k'
SimpleFold('k') = '\u212A' (Kelvin symbol, K)
SimpleFold('\u212A') = 'K'

SimpleFold('1') = '1'

SimpleFold(-2) = -2

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%#U\n", unicode.SimpleFold('A'))      // 'a'
	fmt.Printf("%#U\n", unicode.SimpleFold('a'))      // 'A'
	fmt.Printf("%#U\n", unicode.SimpleFold('K'))      // 'k'
	fmt.Printf("%#U\n", unicode.SimpleFold('k'))      // '\u212A' (Kelvin symbol, K)
	fmt.Printf("%#U\n", unicode.SimpleFold('\u212A')) // 'K'
	fmt.Printf("%#U\n", unicode.SimpleFold('1'))      // '1'

}

Output:

U+0061 'a'
U+0041 'A'
U+006B 'k'
U+212A 'K'
U+004B 'K'
U+0031 '1'

func To

func To(_case int, r rune) rune

To 将 rune 映射为指定的大小写：UpperCase、LowerCase 或 TitleCase。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	const lcG = 'g'
	fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, lcG))
	fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, lcG))
	fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, lcG))

	const ucG = 'G'
	fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, ucG))
	fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, ucG))
	fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, ucG))

}

Output:

U+0047 'G'
U+0067 'g'
U+0047 'G'
U+0047 'G'
U+0067 'g'
U+0047 'G'

func ToLower

func ToLower(r rune) rune

ToLower 将 rune 映射为小写。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	const ucG = 'G'
	fmt.Printf("%#U\n", unicode.ToLower(ucG))

}

Output:

U+0067 'g'

func ToTitle

func ToTitle(r rune) rune

ToTitle 将 rune 映射为标题大写。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	const ucG = 'g'
	fmt.Printf("%#U\n", unicode.ToTitle(ucG))

}

Output:

U+0047 'G'

func ToUpper

func ToUpper(r rune) rune

ToUpper 将 rune 映射为大写。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	const ucG = 'g'
	fmt.Printf("%#U\n", unicode.ToUpper(ucG))

}

Output:

U+0047 'G'

Types

type CaseRange

type CaseRange struct {
	Lo    uint32
	Hi    uint32
	Delta d
}

CaseRange 表示用于简单（一个码点到一个码点）大小写转换的 Unicode 码点范围。范围从 Lo 到 Hi（含两端），固定步长为 1。Delta 是需要加到码点上以到达该字符不同大小写对应码点的数值。它们可以是负数。如果为零，表示该字符已处于对应的大小写形式。有一种特殊情况表示交替对应的大写和小写对的序列。它以固定 Delta 出现：

{UpperLower, UpperLower, UpperLower}

常量 UpperLower 具有一个在其他情况下不可能出现的 delta 值。

type Range16

type Range16 struct {
	Lo     uint16
	Hi     uint16
	Stride uint16
}

Range16 表示一个 16 位 Unicode 码点的范围。范围从 Lo 到 Hi（含两端），具有指定的步长。

type Range32

type Range32 struct {
	Lo     uint32
	Hi     uint32
	Stride uint32
}

Range32 表示一个 Unicode 码点的范围，当一个或多个值无法用 16 位表示时使用。范围从 Lo 到 Hi（含两端），具有指定的步长。Lo 和 Hi 必须始终 >= 1<<16。

type RangeTable

type RangeTable struct {
	R16         []Range16
	R32         []Range32
	LatinOffset int // R16 中 Hi <= MaxLatin1 的条目数量
}

RangeTable 通过列出集合内码点的范围来定义一组 Unicode 码点。范围以两个切片列出以节省空间：一个 16 位范围的切片和一个 32 位范围的切片。两个切片必须按升序排列且不重叠。此外，R32 应仅包含 >= 0x10000 (1<<16) 的值。

type SpecialCase

type SpecialCase []CaseRange

SpecialCase 表示特定于语言的大小写映射，例如土耳其语。 SpecialCase 的方法通过覆盖标准映射来进行自定义。

Example

package main

import (
	"fmt"
	"unicode"
)

func main() {
	t := unicode.TurkishCase

	const lci = 'i'
	fmt.Printf("%#U\n", t.ToLower(lci))
	fmt.Printf("%#U\n", t.ToTitle(lci))
	fmt.Printf("%#U\n", t.ToUpper(lci))

	const uci = 'İ'
	fmt.Printf("%#U\n", t.ToLower(uci))
	fmt.Printf("%#U\n", t.ToTitle(uci))
	fmt.Printf("%#U\n", t.ToUpper(uci))

}

Output:

U+0069 'i'
U+0130 'İ'
U+0130 'İ'
U+0069 'i'
U+0130 'İ'
U+0130 'İ'

var AzeriCase SpecialCase = _TurkishCase

var TurkishCase SpecialCase = _TurkishCase

func (SpecialCase) ToLower

func (special SpecialCase) ToLower(r rune) rune

ToLower 将 rune 映射为小写，优先使用特殊映射。

func (SpecialCase) ToTitle

func (special SpecialCase) ToTitle(r rune) rune

ToTitle 将 rune 映射为标题大写，优先使用特殊映射。

func (SpecialCase) ToUpper

func (special SpecialCase) ToUpper(r rune) rune

ToUpper 将 rune 映射为大写，优先使用特殊映射。

Directories

utf16	utf16 包实现了 UTF-16 序列的编码和解码。
utf8	utf8 包实现了支持 UTF-8 编码文本的函数和常量。