package unicode
import "unicode"
unicode 包提供了用于测试 Unicode 码点某些属性的数据和函数。
Functions starting with "Is" can be used to inspect which table of range a
rune belongs to. Note that runes may fit into more than one range.
Output:Example (Is)
package main
import (
"fmt"
"unicode"
)
func main() {
// constant with mixed type runes
const mixed = "\b5Ὂg̀9! ℃ᾭG"
for _, c := range mixed {
fmt.Printf("For %q:\n", c)
if unicode.IsControl(c) {
fmt.Println("\tis control rune")
}
if unicode.IsDigit(c) {
fmt.Println("\tis digit rune")
}
if unicode.IsGraphic(c) {
fmt.Println("\tis graphic rune")
}
if unicode.IsLetter(c) {
fmt.Println("\tis letter rune")
}
if unicode.IsLower(c) {
fmt.Println("\tis lower case rune")
}
if unicode.IsMark(c) {
fmt.Println("\tis mark rune")
}
if unicode.IsNumber(c) {
fmt.Println("\tis number rune")
}
if unicode.IsPrint(c) {
fmt.Println("\tis printable rune")
}
if !unicode.IsPrint(c) {
fmt.Println("\tis not printable rune")
}
if unicode.IsPunct(c) {
fmt.Println("\tis punct rune")
}
if unicode.IsSpace(c) {
fmt.Println("\tis space rune")
}
if unicode.IsSymbol(c) {
fmt.Println("\tis symbol rune")
}
if unicode.IsTitle(c) {
fmt.Println("\tis title case rune")
}
if unicode.IsUpper(c) {
fmt.Println("\tis upper case rune")
}
}
}
For '\b':
is control rune
is not printable rune
For '5':
is digit rune
is graphic rune
is number rune
is printable rune
For 'Ὂ':
is graphic rune
is letter rune
is printable rune
is upper case rune
For 'g':
is graphic rune
is letter rune
is lower case rune
is printable rune
For '̀':
is graphic rune
is mark rune
is printable rune
For '9':
is digit rune
is graphic rune
is number rune
is printable rune
For '!':
is graphic rune
is printable rune
is punct rune
For ' ':
is graphic rune
is printable rune
is space rune
For '℃':
is graphic rune
is printable rune
is symbol rune
For 'ᾭ':
is graphic rune
is letter rune
is printable rune
is title case rune
For 'G':
is graphic rune
is letter rune
is printable rune
is upper case rune
Index
- Constants
- Variables
- func In(r rune, ranges ...*RangeTable) bool
- func Is(rangeTab *RangeTable, r rune) bool
- func IsControl(r rune) bool
- func IsDigit(r rune) bool
- func IsGraphic(r rune) bool
- func IsLetter(r rune) bool
- func IsLower(r rune) bool
- func IsMark(r rune) bool
- func IsNumber(r rune) bool
- func IsOneOf(ranges []*RangeTable, r rune) bool
- func IsPrint(r rune) bool
- func IsPunct(r rune) bool
- func IsSpace(r rune) bool
- func IsSymbol(r rune) bool
- func IsTitle(r rune) bool
- func IsUpper(r rune) bool
- func SimpleFold(r rune) rune
- func To(_case int, r rune) rune
- func ToLower(r rune) rune
- func ToTitle(r rune) rune
- func ToUpper(r rune) rune
- type CaseRange
- type Range16
- type Range32
- type RangeTable
- type SpecialCase
Examples
- IsDigit
- IsLetter
- IsLower
- IsNumber
- IsSpace
- IsTitle
- IsUpper
- SimpleFold
- SpecialCase
- To
- ToLower
- ToTitle
- ToUpper
- package (Is)
Constants
const ( MaxRune = '\U0010FFFF' // 最大有效 Unicode 码点。 ReplacementChar = '\uFFFD' // 表示无效码点。 MaxASCII = '\u007F' // 最大 ASCII 值。 MaxLatin1 = '\u00FF' // 最大 Latin-1 值。 )
const ( UpperCase = iota LowerCase TitleCase MaxCase )
CaseRanges 中 Delta 数组的索引,用于大小写映射。
const ( UpperLower = MaxRune + 1 // (不可能是有效的 delta 值。) )
如果 CaseRange 的 Delta 字段为 UpperLower,则表示 该 CaseRange 表示如下形式的序列(例如): Upper Lower Upper Lower。
const Version = "15.0.0"
Version is the Unicode edition from which the tables are derived.
Variables
var ( Cc = _Cc // Cc is the set of Unicode characters in category Cc (Other, control). Cf = _Cf // Cf is the set of Unicode characters in category Cf (Other, format). Cn = _Cn // Cn is the set of Unicode characters in category Cn (Other, not assigned). Co = _Co // Co is the set of Unicode characters in category Co (Other, private use). Cs = _Cs // Cs is the set of Unicode characters in category Cs (Other, surrogate). Digit = _Nd // Digit is the set of Unicode characters with the "decimal digit" property. Nd = _Nd // Nd is the set of Unicode characters in category Nd (Number, decimal digit). LC = _LC // LC is the set of Unicode characters in category LC (Letter, cased: Ll | Lt | Lu). Letter = _L // Letter/L is the set of Unicode letters, category L. L = _L Lm = _Lm // Lm is the set of Unicode characters in category Lm (Letter, modifier). Lo = _Lo // Lo is the set of Unicode characters in category Lo (Letter, other). Lower = _Ll // Lower is the set of Unicode lower case letters. Ll = _Ll // Ll is the set of Unicode characters in category Ll (Letter, lowercase). Mark = _M // Mark/M is the set of Unicode mark characters, category M. M = _M Mc = _Mc // Mc is the set of Unicode characters in category Mc (Mark, spacing combining). Me = _Me // Me is the set of Unicode characters in category Me (Mark, enclosing). Mn = _Mn // Mn is the set of Unicode characters in category Mn (Mark, nonspacing). Nl = _Nl // Nl is the set of Unicode characters in category Nl (Number, letter). No = _No // No is the set of Unicode characters in category No (Number, other). Number = _N // Number/N is the set of Unicode number characters, category N. N = _N Other = _C // Other/C is the set of Unicode control, special, and unassigned code points, category C. C = _C Pc = _Pc // Pc is the set of Unicode characters in category Pc (Punctuation, connector). Pd = _Pd // Pd is the set of Unicode characters in category Pd (Punctuation, dash). Pe = _Pe // Pe is the set of Unicode characters in category Pe (Punctuation, close). Pf = _Pf // Pf is the set of Unicode characters in category Pf (Punctuation, final quote). Pi = _Pi // Pi is the set of Unicode characters in category Pi (Punctuation, initial quote). Po = _Po // Po is the set of Unicode characters in category Po (Punctuation, other). Ps = _Ps // Ps is the set of Unicode characters in category Ps (Punctuation, open). Punct = _P // Punct/P is the set of Unicode punctuation characters, category P. P = _P Sc = _Sc // Sc is the set of Unicode characters in category Sc (Symbol, currency). Sk = _Sk // Sk is the set of Unicode characters in category Sk (Symbol, modifier). Sm = _Sm // Sm is the set of Unicode characters in category Sm (Symbol, math). So = _So // So is the set of Unicode characters in category So (Symbol, other). Space = _Z // Space/Z is the set of Unicode space characters, category Z. Z = _Z Symbol = _S // Symbol/S is the set of Unicode symbol characters, category S. S = _S Title = _Lt // Title is the set of Unicode title case letters. Lt = _Lt // Lt is the set of Unicode characters in category Lt (Letter, titlecase). Upper = _Lu // Upper is the set of Unicode upper case letters. Lu = _Lu // Lu is the set of Unicode characters in category Lu (Letter, uppercase). Zl = _Zl // Zl is the set of Unicode characters in category Zl (Separator, line). Zp = _Zp // Zp is the set of Unicode characters in category Zp (Separator, paragraph). Zs = _Zs // Zs is the set of Unicode characters in category Zs (Separator, space). )
These variables have type *RangeTable.
var ( Adlam = _Adlam // Adlam is the set of Unicode characters in script Adlam. Ahom = _Ahom // Ahom is the set of Unicode characters in script Ahom. Anatolian_Hieroglyphs = _Anatolian_Hieroglyphs // Anatolian_Hieroglyphs is the set of Unicode characters in script Anatolian_Hieroglyphs. Arabic = _Arabic // Arabic is the set of Unicode characters in script Arabic. Armenian = _Armenian // Armenian is the set of Unicode characters in script Armenian. Avestan = _Avestan // Avestan is the set of Unicode characters in script Avestan. Balinese = _Balinese // Balinese is the set of Unicode characters in script Balinese. Bamum = _Bamum // Bamum is the set of Unicode characters in script Bamum. Bassa_Vah = _Bassa_Vah // Bassa_Vah is the set of Unicode characters in script Bassa_Vah. Batak = _Batak // Batak is the set of Unicode characters in script Batak. Bengali = _Bengali // Bengali is the set of Unicode characters in script Bengali. Bhaiksuki = _Bhaiksuki // Bhaiksuki is the set of Unicode characters in script Bhaiksuki. Bopomofo = _Bopomofo // Bopomofo is the set of Unicode characters in script Bopomofo. Brahmi = _Brahmi // Brahmi is the set of Unicode characters in script Brahmi. Braille = _Braille // Braille is the set of Unicode characters in script Braille. Buginese = _Buginese // Buginese is the set of Unicode characters in script Buginese. Buhid = _Buhid // Buhid is the set of Unicode characters in script Buhid. Canadian_Aboriginal = _Canadian_Aboriginal // Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal. Carian = _Carian // Carian is the set of Unicode characters in script Carian. Caucasian_Albanian = _Caucasian_Albanian // Caucasian_Albanian is the set of Unicode characters in script Caucasian_Albanian. Chakma = _Chakma // Chakma is the set of Unicode characters in script Chakma. Cham = _Cham // Cham is the set of Unicode characters in script Cham. Cherokee = _Cherokee // Cherokee is the set of Unicode characters in script Cherokee. Chorasmian = _Chorasmian // Chorasmian is the set of Unicode characters in script Chorasmian. Common = _Common // Common is the set of Unicode characters in script Common. Coptic = _Coptic // Coptic is the set of Unicode characters in script Coptic. Cuneiform = _Cuneiform // Cuneiform is the set of Unicode characters in script Cuneiform. Cypriot = _Cypriot // Cypriot is the set of Unicode characters in script Cypriot. Cypro_Minoan = _Cypro_Minoan // Cypro_Minoan is the set of Unicode characters in script Cypro_Minoan. Cyrillic = _Cyrillic // Cyrillic is the set of Unicode characters in script Cyrillic. Deseret = _Deseret // Deseret is the set of Unicode characters in script Deseret. Devanagari = _Devanagari // Devanagari is the set of Unicode characters in script Devanagari. Dives_Akuru = _Dives_Akuru // Dives_Akuru is the set of Unicode characters in script Dives_Akuru. Dogra = _Dogra // Dogra is the set of Unicode characters in script Dogra. Duployan = _Duployan // Duployan is the set of Unicode characters in script Duployan. Egyptian_Hieroglyphs = _Egyptian_Hieroglyphs // Egyptian_Hieroglyphs is the set of Unicode characters in script Egyptian_Hieroglyphs. Elbasan = _Elbasan // Elbasan is the set of Unicode characters in script Elbasan. Elymaic = _Elymaic // Elymaic is the set of Unicode characters in script Elymaic. Ethiopic = _Ethiopic // Ethiopic is the set of Unicode characters in script Ethiopic. Georgian = _Georgian // Georgian is the set of Unicode characters in script Georgian. Glagolitic = _Glagolitic // Glagolitic is the set of Unicode characters in script Glagolitic. Gothic = _Gothic // Gothic is the set of Unicode characters in script Gothic. Grantha = _Grantha // Grantha is the set of Unicode characters in script Grantha. Greek = _Greek // Greek is the set of Unicode characters in script Greek. Gujarati = _Gujarati // Gujarati is the set of Unicode characters in script Gujarati. Gunjala_Gondi = _Gunjala_Gondi // Gunjala_Gondi is the set of Unicode characters in script Gunjala_Gondi. Gurmukhi = _Gurmukhi // Gurmukhi is the set of Unicode characters in script Gurmukhi. Han = _Han // Han is the set of Unicode characters in script Han. Hangul = _Hangul // Hangul is the set of Unicode characters in script Hangul. Hanifi_Rohingya = _Hanifi_Rohingya // Hanifi_Rohingya is the set of Unicode characters in script Hanifi_Rohingya. Hanunoo = _Hanunoo // Hanunoo is the set of Unicode characters in script Hanunoo. Hatran = _Hatran // Hatran is the set of Unicode characters in script Hatran. Hebrew = _Hebrew // Hebrew is the set of Unicode characters in script Hebrew. Hiragana = _Hiragana // Hiragana is the set of Unicode characters in script Hiragana. Imperial_Aramaic = _Imperial_Aramaic // Imperial_Aramaic is the set of Unicode characters in script Imperial_Aramaic. Inherited = _Inherited // Inherited is the set of Unicode characters in script Inherited. Inscriptional_Pahlavi = _Inscriptional_Pahlavi // Inscriptional_Pahlavi is the set of Unicode characters in script Inscriptional_Pahlavi. Inscriptional_Parthian = _Inscriptional_Parthian // Inscriptional_Parthian is the set of Unicode characters in script Inscriptional_Parthian. Javanese = _Javanese // Javanese is the set of Unicode characters in script Javanese. Kaithi = _Kaithi // Kaithi is the set of Unicode characters in script Kaithi. Kannada = _Kannada // Kannada is the set of Unicode characters in script Kannada. Katakana = _Katakana // Katakana is the set of Unicode characters in script Katakana. Kawi = _Kawi // Kawi is the set of Unicode characters in script Kawi. Kayah_Li = _Kayah_Li // Kayah_Li is the set of Unicode characters in script Kayah_Li. Kharoshthi = _Kharoshthi // Kharoshthi is the set of Unicode characters in script Kharoshthi. Khitan_Small_Script = _Khitan_Small_Script // Khitan_Small_Script is the set of Unicode characters in script Khitan_Small_Script. Khmer = _Khmer // Khmer is the set of Unicode characters in script Khmer. Khojki = _Khojki // Khojki is the set of Unicode characters in script Khojki. Khudawadi = _Khudawadi // Khudawadi is the set of Unicode characters in script Khudawadi. Lao = _Lao // Lao is the set of Unicode characters in script Lao. Latin = _Latin // Latin is the set of Unicode characters in script Latin. Lepcha = _Lepcha // Lepcha is the set of Unicode characters in script Lepcha. Limbu = _Limbu // Limbu is the set of Unicode characters in script Limbu. Linear_A = _Linear_A // Linear_A is the set of Unicode characters in script Linear_A. Linear_B = _Linear_B // Linear_B is the set of Unicode characters in script Linear_B. Lisu = _Lisu // Lisu is the set of Unicode characters in script Lisu. Lycian = _Lycian // Lycian is the set of Unicode characters in script Lycian. Lydian = _Lydian // Lydian is the set of Unicode characters in script Lydian. Mahajani = _Mahajani // Mahajani is the set of Unicode characters in script Mahajani. Makasar = _Makasar // Makasar is the set of Unicode characters in script Makasar. Malayalam = _Malayalam // Malayalam is the set of Unicode characters in script Malayalam. Mandaic = _Mandaic // Mandaic is the set of Unicode characters in script Mandaic. Manichaean = _Manichaean // Manichaean is the set of Unicode characters in script Manichaean. Marchen = _Marchen // Marchen is the set of Unicode characters in script Marchen. Masaram_Gondi = _Masaram_Gondi // Masaram_Gondi is the set of Unicode characters in script Masaram_Gondi. Medefaidrin = _Medefaidrin // Medefaidrin is the set of Unicode characters in script Medefaidrin. Meetei_Mayek = _Meetei_Mayek // Meetei_Mayek is the set of Unicode characters in script Meetei_Mayek. Mende_Kikakui = _Mende_Kikakui // Mende_Kikakui is the set of Unicode characters in script Mende_Kikakui. Meroitic_Cursive = _Meroitic_Cursive // Meroitic_Cursive is the set of Unicode characters in script Meroitic_Cursive. Meroitic_Hieroglyphs = _Meroitic_Hieroglyphs // Meroitic_Hieroglyphs is the set of Unicode characters in script Meroitic_Hieroglyphs. Miao = _Miao // Miao is the set of Unicode characters in script Miao. Modi = _Modi // Modi is the set of Unicode characters in script Modi. Mongolian = _Mongolian // Mongolian is the set of Unicode characters in script Mongolian. Mro = _Mro // Mro is the set of Unicode characters in script Mro. Multani = _Multani // Multani is the set of Unicode characters in script Multani. Myanmar = _Myanmar // Myanmar is the set of Unicode characters in script Myanmar. Nabataean = _Nabataean // Nabataean is the set of Unicode characters in script Nabataean. Nag_Mundari = _Nag_Mundari // Nag_Mundari is the set of Unicode characters in script Nag_Mundari. Nandinagari = _Nandinagari // Nandinagari is the set of Unicode characters in script Nandinagari. New_Tai_Lue = _New_Tai_Lue // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue. Newa = _Newa // Newa is the set of Unicode characters in script Newa. Nko = _Nko // Nko is the set of Unicode characters in script Nko. Nushu = _Nushu // Nushu is the set of Unicode characters in script Nushu. Nyiakeng_Puachue_Hmong = _Nyiakeng_Puachue_Hmong // Nyiakeng_Puachue_Hmong is the set of Unicode characters in script Nyiakeng_Puachue_Hmong. Ogham = _Ogham // Ogham is the set of Unicode characters in script Ogham. Ol_Chiki = _Ol_Chiki // Ol_Chiki is the set of Unicode characters in script Ol_Chiki. Old_Hungarian = _Old_Hungarian // Old_Hungarian is the set of Unicode characters in script Old_Hungarian. Old_Italic = _Old_Italic // Old_Italic is the set of Unicode characters in script Old_Italic. Old_North_Arabian = _Old_North_Arabian // Old_North_Arabian is the set of Unicode characters in script Old_North_Arabian. Old_Permic = _Old_Permic // Old_Permic is the set of Unicode characters in script Old_Permic. Old_Persian = _Old_Persian // Old_Persian is the set of Unicode characters in script Old_Persian. Old_Sogdian = _Old_Sogdian // Old_Sogdian is the set of Unicode characters in script Old_Sogdian. Old_South_Arabian = _Old_South_Arabian // Old_South_Arabian is the set of Unicode characters in script Old_South_Arabian. Old_Turkic = _Old_Turkic // Old_Turkic is the set of Unicode characters in script Old_Turkic. Old_Uyghur = _Old_Uyghur // Old_Uyghur is the set of Unicode characters in script Old_Uyghur. Oriya = _Oriya // Oriya is the set of Unicode characters in script Oriya. Osage = _Osage // Osage is the set of Unicode characters in script Osage. Osmanya = _Osmanya // Osmanya is the set of Unicode characters in script Osmanya. Pahawh_Hmong = _Pahawh_Hmong // Pahawh_Hmong is the set of Unicode characters in script Pahawh_Hmong. Palmyrene = _Palmyrene // Palmyrene is the set of Unicode characters in script Palmyrene. Pau_Cin_Hau = _Pau_Cin_Hau // Pau_Cin_Hau is the set of Unicode characters in script Pau_Cin_Hau. Phags_Pa = _Phags_Pa // Phags_Pa is the set of Unicode characters in script Phags_Pa. Phoenician = _Phoenician // Phoenician is the set of Unicode characters in script Phoenician. Psalter_Pahlavi = _Psalter_Pahlavi // Psalter_Pahlavi is the set of Unicode characters in script Psalter_Pahlavi. Rejang = _Rejang // Rejang is the set of Unicode characters in script Rejang. Runic = _Runic // Runic is the set of Unicode characters in script Runic. Samaritan = _Samaritan // Samaritan is the set of Unicode characters in script Samaritan. Saurashtra = _Saurashtra // Saurashtra is the set of Unicode characters in script Saurashtra. Sharada = _Sharada // Sharada is the set of Unicode characters in script Sharada. Shavian = _Shavian // Shavian is the set of Unicode characters in script Shavian. Siddham = _Siddham // Siddham is the set of Unicode characters in script Siddham. SignWriting = _SignWriting // SignWriting is the set of Unicode characters in script SignWriting. Sinhala = _Sinhala // Sinhala is the set of Unicode characters in script Sinhala. Sogdian = _Sogdian // Sogdian is the set of Unicode characters in script Sogdian. Sora_Sompeng = _Sora_Sompeng // Sora_Sompeng is the set of Unicode characters in script Sora_Sompeng. Soyombo = _Soyombo // Soyombo is the set of Unicode characters in script Soyombo. Sundanese = _Sundanese // Sundanese is the set of Unicode characters in script Sundanese. Syloti_Nagri = _Syloti_Nagri // Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri. Syriac = _Syriac // Syriac is the set of Unicode characters in script Syriac. Tagalog = _Tagalog // Tagalog is the set of Unicode characters in script Tagalog. Tagbanwa = _Tagbanwa // Tagbanwa is the set of Unicode characters in script Tagbanwa. Tai_Le = _Tai_Le // Tai_Le is the set of Unicode characters in script Tai_Le. Tai_Tham = _Tai_Tham // Tai_Tham is the set of Unicode characters in script Tai_Tham. Tai_Viet = _Tai_Viet // Tai_Viet is the set of Unicode characters in script Tai_Viet. Takri = _Takri // Takri is the set of Unicode characters in script Takri. Tamil = _Tamil // Tamil is the set of Unicode characters in script Tamil. Tangsa = _Tangsa // Tangsa is the set of Unicode characters in script Tangsa. Tangut = _Tangut // Tangut is the set of Unicode characters in script Tangut. Telugu = _Telugu // Telugu is the set of Unicode characters in script Telugu. Thaana = _Thaana // Thaana is the set of Unicode characters in script Thaana. Thai = _Thai // Thai is the set of Unicode characters in script Thai. Tibetan = _Tibetan // Tibetan is the set of Unicode characters in script Tibetan. Tifinagh = _Tifinagh // Tifinagh is the set of Unicode characters in script Tifinagh. Tirhuta = _Tirhuta // Tirhuta is the set of Unicode characters in script Tirhuta. Toto = _Toto // Toto is the set of Unicode characters in script Toto. Ugaritic = _Ugaritic // Ugaritic is the set of Unicode characters in script Ugaritic. Vai = _Vai // Vai is the set of Unicode characters in script Vai. Vithkuqi = _Vithkuqi // Vithkuqi is the set of Unicode characters in script Vithkuqi. Wancho = _Wancho // Wancho is the set of Unicode characters in script Wancho. Warang_Citi = _Warang_Citi // Warang_Citi is the set of Unicode characters in script Warang_Citi. Yezidi = _Yezidi // Yezidi is the set of Unicode characters in script Yezidi. Yi = _Yi // Yi is the set of Unicode characters in script Yi. Zanabazar_Square = _Zanabazar_Square // Zanabazar_Square is the set of Unicode characters in script Zanabazar_Square. )
These variables have type *RangeTable.
var ( ASCII_Hex_Digit = _ASCII_Hex_Digit // ASCII_Hex_Digit is the set of Unicode characters with property ASCII_Hex_Digit. Bidi_Control = _Bidi_Control // Bidi_Control is the set of Unicode characters with property Bidi_Control. Dash = _Dash // Dash is the set of Unicode characters with property Dash. Deprecated = _Deprecated // Deprecated is the set of Unicode characters with property Deprecated. Diacritic = _Diacritic // Diacritic is the set of Unicode characters with property Diacritic. Extender = _Extender // Extender is the set of Unicode characters with property Extender. Hex_Digit = _Hex_Digit // Hex_Digit is the set of Unicode characters with property Hex_Digit. Hyphen = _Hyphen // Hyphen is the set of Unicode characters with property Hyphen. IDS_Binary_Operator = _IDS_Binary_Operator // IDS_Binary_Operator is the set of Unicode characters with property IDS_Binary_Operator. IDS_Trinary_Operator = _IDS_Trinary_Operator // IDS_Trinary_Operator is the set of Unicode characters with property IDS_Trinary_Operator. Ideographic = _Ideographic // Ideographic is the set of Unicode characters with property Ideographic. Join_Control = _Join_Control // Join_Control is the set of Unicode characters with property Join_Control. Logical_Order_Exception = _Logical_Order_Exception // Logical_Order_Exception is the set of Unicode characters with property Logical_Order_Exception. Noncharacter_Code_Point = _Noncharacter_Code_Point // Noncharacter_Code_Point is the set of Unicode characters with property Noncharacter_Code_Point. Other_Alphabetic = _Other_Alphabetic // Other_Alphabetic is the set of Unicode characters with property Other_Alphabetic. Other_Default_Ignorable_Code_Point = _Other_Default_Ignorable_Code_Point // Other_Default_Ignorable_Code_Point is the set of Unicode characters with property Other_Default_Ignorable_Code_Point. Other_Grapheme_Extend = _Other_Grapheme_Extend // Other_Grapheme_Extend is the set of Unicode characters with property Other_Grapheme_Extend. Other_ID_Continue = _Other_ID_Continue // Other_ID_Continue is the set of Unicode characters with property Other_ID_Continue. Other_ID_Start = _Other_ID_Start // Other_ID_Start is the set of Unicode characters with property Other_ID_Start. Other_Lowercase = _Other_Lowercase // Other_Lowercase is the set of Unicode characters with property Other_Lowercase. Other_Math = _Other_Math // Other_Math is the set of Unicode characters with property Other_Math. Other_Uppercase = _Other_Uppercase // Other_Uppercase is the set of Unicode characters with property Other_Uppercase. Pattern_Syntax = _Pattern_Syntax // Pattern_Syntax is the set of Unicode characters with property Pattern_Syntax. Pattern_White_Space = _Pattern_White_Space // Pattern_White_Space is the set of Unicode characters with property Pattern_White_Space. Prepended_Concatenation_Mark = _Prepended_Concatenation_Mark // Prepended_Concatenation_Mark is the set of Unicode characters with property Prepended_Concatenation_Mark. Quotation_Mark = _Quotation_Mark // Quotation_Mark is the set of Unicode characters with property Quotation_Mark. Radical = _Radical // Radical is the set of Unicode characters with property Radical. Regional_Indicator = _Regional_Indicator // Regional_Indicator is the set of Unicode characters with property Regional_Indicator. STerm = _Sentence_Terminal // STerm is an alias for Sentence_Terminal. Sentence_Terminal = _Sentence_Terminal // Sentence_Terminal is the set of Unicode characters with property Sentence_Terminal. Soft_Dotted = _Soft_Dotted // Soft_Dotted is the set of Unicode characters with property Soft_Dotted. Terminal_Punctuation = _Terminal_Punctuation // Terminal_Punctuation is the set of Unicode characters with property Terminal_Punctuation. Unified_Ideograph = _Unified_Ideograph // Unified_Ideograph is the set of Unicode characters with property Unified_Ideograph. Variation_Selector = _Variation_Selector // Variation_Selector is the set of Unicode characters with property Variation_Selector. White_Space = _White_Space // White_Space is the set of Unicode characters with property White_Space. )
These variables have type *RangeTable.
var CaseRanges = _CaseRanges
CaseRanges is the table describing case mappings for all letters with non-self mappings.
var Categories = map[string]*RangeTable{ "C": C, "Cc": Cc, "Cf": Cf, "Cn": Cn, "Co": Co, "Cs": Cs, "L": L, "LC": LC, "Ll": Ll, "Lm": Lm, "Lo": Lo, "Lt": Lt, "Lu": Lu, "M": M, "Mc": Mc, "Me": Me, "Mn": Mn, "N": N, "Nd": Nd, "Nl": Nl, "No": No, "P": P, "Pc": Pc, "Pd": Pd, "Pe": Pe, "Pf": Pf, "Pi": Pi, "Po": Po, "Ps": Ps, "S": S, "Sc": Sc, "Sk": Sk, "Sm": Sm, "So": So, "Z": Z, "Zl": Zl, "Zp": Zp, "Zs": Zs, }
Categories is the set of Unicode category tables.
var CategoryAliases = map[string]string{ "Cased_Letter": "LC", "Close_Punctuation": "Pe", "Combining_Mark": "M", "Connector_Punctuation": "Pc", "Control": "Cc", "Currency_Symbol": "Sc", "Dash_Punctuation": "Pd", "Decimal_Number": "Nd", "Enclosing_Mark": "Me", "Final_Punctuation": "Pf", "Format": "Cf", "Initial_Punctuation": "Pi", "Letter": "L", "Letter_Number": "Nl", "Line_Separator": "Zl", "Lowercase_Letter": "Ll", "Mark": "M", "Math_Symbol": "Sm", "Modifier_Letter": "Lm", "Modifier_Symbol": "Sk", "Nonspacing_Mark": "Mn", "Number": "N", "Open_Punctuation": "Ps", "Other": "C", "Other_Letter": "Lo", "Other_Number": "No", "Other_Punctuation": "Po", "Other_Symbol": "So", "Paragraph_Separator": "Zp", "Private_Use": "Co", "Punctuation": "P", "Separator": "Z", "Space_Separator": "Zs", "Spacing_Mark": "Mc", "Surrogate": "Cs", "Symbol": "S", "Titlecase_Letter": "Lt", "Unassigned": "Cn", "Uppercase_Letter": "Lu", "cntrl": "Cc", "digit": "Nd", "punct": "P", }
CategoryAliases maps category aliases to standard category names.
var FoldCategory = map[string]*RangeTable{ "L": foldL, "Ll": foldLl, "Lt": foldLt, "Lu": foldLu, "M": foldM, "Mn": foldMn, }
FoldCategory maps a category name to a table of code points outside the category that are equivalent under simple case folding to code points inside the category. If there is no entry for a category name, there are no such points.
var FoldScript = map[string]*RangeTable{ "Common": foldCommon, "Greek": foldGreek, "Inherited": foldInherited, }
FoldScript maps a script name to a table of code points outside the script that are equivalent under simple case folding to code points inside the script. If there is no entry for a script name, there are no such points.
var GraphicRanges = []*RangeTable{ L, M, N, P, S, Zs, }
GraphicRanges 定义了按 Unicode 标准划分的图形字符集合。
var PrintRanges = []*RangeTable{ L, M, N, P, S, }
PrintRanges 定义了按 Go 标准划分的可打印字符集合。 ASCII 空格 U+0020 单独处理。
var Properties = map[string]*RangeTable{ "ASCII_Hex_Digit": ASCII_Hex_Digit, "Bidi_Control": Bidi_Control, "Dash": Dash, "Deprecated": Deprecated, "Diacritic": Diacritic, "Extender": Extender, "Hex_Digit": Hex_Digit, "Hyphen": Hyphen, "IDS_Binary_Operator": IDS_Binary_Operator, "IDS_Trinary_Operator": IDS_Trinary_Operator, "Ideographic": Ideographic, "Join_Control": Join_Control, "Logical_Order_Exception": Logical_Order_Exception, "Noncharacter_Code_Point": Noncharacter_Code_Point, "Other_Alphabetic": Other_Alphabetic, "Other_Default_Ignorable_Code_Point": Other_Default_Ignorable_Code_Point, "Other_Grapheme_Extend": Other_Grapheme_Extend, "Other_ID_Continue": Other_ID_Continue, "Other_ID_Start": Other_ID_Start, "Other_Lowercase": Other_Lowercase, "Other_Math": Other_Math, "Other_Uppercase": Other_Uppercase, "Pattern_Syntax": Pattern_Syntax, "Pattern_White_Space": Pattern_White_Space, "Prepended_Concatenation_Mark": Prepended_Concatenation_Mark, "Quotation_Mark": Quotation_Mark, "Radical": Radical, "Regional_Indicator": Regional_Indicator, "Sentence_Terminal": Sentence_Terminal, "STerm": Sentence_Terminal, "Soft_Dotted": Soft_Dotted, "Terminal_Punctuation": Terminal_Punctuation, "Unified_Ideograph": Unified_Ideograph, "Variation_Selector": Variation_Selector, "White_Space": White_Space, }
Properties is the set of Unicode property tables.
var Scripts = map[string]*RangeTable{ "Adlam": Adlam, "Ahom": Ahom, "Anatolian_Hieroglyphs": Anatolian_Hieroglyphs, "Arabic": Arabic, "Armenian": Armenian, "Avestan": Avestan, "Balinese": Balinese, "Bamum": Bamum, "Bassa_Vah": Bassa_Vah, "Batak": Batak, "Bengali": Bengali, "Bhaiksuki": Bhaiksuki, "Bopomofo": Bopomofo, "Brahmi": Brahmi, "Braille": Braille, "Buginese": Buginese, "Buhid": Buhid, "Canadian_Aboriginal": Canadian_Aboriginal, "Carian": Carian, "Caucasian_Albanian": Caucasian_Albanian, "Chakma": Chakma, "Cham": Cham, "Cherokee": Cherokee, "Chorasmian": Chorasmian, "Common": Common, "Coptic": Coptic, "Cuneiform": Cuneiform, "Cypriot": Cypriot, "Cypro_Minoan": Cypro_Minoan, "Cyrillic": Cyrillic, "Deseret": Deseret, "Devanagari": Devanagari, "Dives_Akuru": Dives_Akuru, "Dogra": Dogra, "Duployan": Duployan, "Egyptian_Hieroglyphs": Egyptian_Hieroglyphs, "Elbasan": Elbasan, "Elymaic": Elymaic, "Ethiopic": Ethiopic, "Georgian": Georgian, "Glagolitic": Glagolitic, "Gothic": Gothic, "Grantha": Grantha, "Greek": Greek, "Gujarati": Gujarati, "Gunjala_Gondi": Gunjala_Gondi, "Gurmukhi": Gurmukhi, "Han": Han, "Hangul": Hangul, "Hanifi_Rohingya": Hanifi_Rohingya, "Hanunoo": Hanunoo, "Hatran": Hatran, "Hebrew": Hebrew, "Hiragana": Hiragana, "Imperial_Aramaic": Imperial_Aramaic, "Inherited": Inherited, "Inscriptional_Pahlavi": Inscriptional_Pahlavi, "Inscriptional_Parthian": Inscriptional_Parthian, "Javanese": Javanese, "Kaithi": Kaithi, "Kannada": Kannada, "Katakana": Katakana, "Kawi": Kawi, "Kayah_Li": Kayah_Li, "Kharoshthi": Kharoshthi, "Khitan_Small_Script": Khitan_Small_Script, "Khmer": Khmer, "Khojki": Khojki, "Khudawadi": Khudawadi, "Lao": Lao, "Latin": Latin, "Lepcha": Lepcha, "Limbu": Limbu, "Linear_A": Linear_A, "Linear_B": Linear_B, "Lisu": Lisu, "Lycian": Lycian, "Lydian": Lydian, "Mahajani": Mahajani, "Makasar": Makasar, "Malayalam": Malayalam, "Mandaic": Mandaic, "Manichaean": Manichaean, "Marchen": Marchen, "Masaram_Gondi": Masaram_Gondi, "Medefaidrin": Medefaidrin, "Meetei_Mayek": Meetei_Mayek, "Mende_Kikakui": Mende_Kikakui, "Meroitic_Cursive": Meroitic_Cursive, "Meroitic_Hieroglyphs": Meroitic_Hieroglyphs, "Miao": Miao, "Modi": Modi, "Mongolian": Mongolian, "Mro": Mro, "Multani": Multani, "Myanmar": Myanmar, "Nabataean": Nabataean, "Nag_Mundari": Nag_Mundari, "Nandinagari": Nandinagari, "New_Tai_Lue": New_Tai_Lue, "Newa": Newa, "Nko": Nko, "Nushu": Nushu, "Nyiakeng_Puachue_Hmong": Nyiakeng_Puachue_Hmong, "Ogham": Ogham, "Ol_Chiki": Ol_Chiki, "Old_Hungarian": Old_Hungarian, "Old_Italic": Old_Italic, "Old_North_Arabian": Old_North_Arabian, "Old_Permic": Old_Permic, "Old_Persian": Old_Persian, "Old_Sogdian": Old_Sogdian, "Old_South_Arabian": Old_South_Arabian, "Old_Turkic": Old_Turkic, "Old_Uyghur": Old_Uyghur, "Oriya": Oriya, "Osage": Osage, "Osmanya": Osmanya, "Pahawh_Hmong": Pahawh_Hmong, "Palmyrene": Palmyrene, "Pau_Cin_Hau": Pau_Cin_Hau, "Phags_Pa": Phags_Pa, "Phoenician": Phoenician, "Psalter_Pahlavi": Psalter_Pahlavi, "Rejang": Rejang, "Runic": Runic, "Samaritan": Samaritan, "Saurashtra": Saurashtra, "Sharada": Sharada, "Shavian": Shavian, "Siddham": Siddham, "SignWriting": SignWriting, "Sinhala": Sinhala, "Sogdian": Sogdian, "Sora_Sompeng": Sora_Sompeng, "Soyombo": Soyombo, "Sundanese": Sundanese, "Syloti_Nagri": Syloti_Nagri, "Syriac": Syriac, "Tagalog": Tagalog, "Tagbanwa": Tagbanwa, "Tai_Le": Tai_Le, "Tai_Tham": Tai_Tham, "Tai_Viet": Tai_Viet, "Takri": Takri, "Tamil": Tamil, "Tangsa": Tangsa, "Tangut": Tangut, "Telugu": Telugu, "Thaana": Thaana, "Thai": Thai, "Tibetan": Tibetan, "Tifinagh": Tifinagh, "Tirhuta": Tirhuta, "Toto": Toto, "Ugaritic": Ugaritic, "Vai": Vai, "Vithkuqi": Vithkuqi, "Wancho": Wancho, "Warang_Citi": Warang_Citi, "Yezidi": Yezidi, "Yi": Yi, "Zanabazar_Square": Zanabazar_Square, }
Scripts is the set of Unicode script tables.
Functions
func In
func In(r rune, ranges ...*RangeTable) bool
In 报告该 rune 是否属于某个范围的成员。
func Is
func Is(rangeTab *RangeTable, r rune) bool
Is 报告该 rune 是否在指定的范围表中。
func IsControl
func IsControl(r rune) bool
IsControl 报告该 rune 是否为控制字符。 C(Other)Unicode 分类包含更多的码点, 例如代理项;使用 Is(C, r) 来测试它们。
func IsDigit
func IsDigit(r rune) bool
IsDigit 报告该 rune 是否为十进制数字。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
fmt.Printf("%t\n", unicode.IsDigit('৩'))
fmt.Printf("%t\n", unicode.IsDigit('A'))
}
true
false
func IsGraphic
func IsGraphic(r rune) bool
IsGraphic 报告该 rune 是否被 Unicode 定义为图形字符。 此类字符包括字母、标记、数字、标点、符号和空格, 来自分类 L、M、N、P、S、Zs。
func IsLetter
func IsLetter(r rune) bool
IsLetter 报告该 rune 是否为字母(分类 L)。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
fmt.Printf("%t\n", unicode.IsLetter('A'))
fmt.Printf("%t\n", unicode.IsLetter('7'))
}
true
false
func IsLower
func IsLower(r rune) bool
IsLower 报告该 rune 是否为小写字母。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
fmt.Printf("%t\n", unicode.IsLower('a'))
fmt.Printf("%t\n", unicode.IsLower('A'))
}
true
false
func IsMark
func IsMark(r rune) bool
IsMark 报告该 rune 是否为标记字符(分类 M)。
func IsNumber
func IsNumber(r rune) bool
IsNumber 报告该 rune 是否为数字(分类 N)。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
fmt.Printf("%t\n", unicode.IsNumber('Ⅷ'))
fmt.Printf("%t\n", unicode.IsNumber('A'))
}
true
false
func IsOneOf
func IsOneOf(ranges []*RangeTable, r rune) bool
IsOneOf 报告该 rune 是否属于某个范围的成员。 函数 "In" 提供了更好的签名,应优先于 IsOneOf 使用。
func IsPrint
func IsPrint(r rune) bool
IsPrint 报告该 rune 是否被 Go 定义为可打印字符。此类字符包括 字母、标记、数字、标点、符号以及 ASCII 空格字符, 来自分类 L、M、N、P、S 和 ASCII 空格字符。 该分类与 IsGraphic 相同,唯一的区别是仅有的间距字符为 ASCII 空格 U+0020。
func IsPunct
func IsPunct(r rune) bool
IsPunct 报告该 rune 是否为 Unicode 标点字符 (分类 P)。
func IsSpace
func IsSpace(r rune) bool
IsSpace 报告该 rune 是否为 Unicode White Space 属性定义的空白字符; 在 Latin-1 空间中包括:
'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP)。
其他空白字符的定义由分类 Z 和属性 Pattern_White_Space 设定。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
fmt.Printf("%t\n", unicode.IsSpace(' '))
fmt.Printf("%t\n", unicode.IsSpace('\n'))
fmt.Printf("%t\n", unicode.IsSpace('\t'))
fmt.Printf("%t\n", unicode.IsSpace('a'))
}
true
true
true
false
func IsSymbol
func IsSymbol(r rune) bool
IsSymbol 报告该 rune 是否为符号字符。
func IsTitle
func IsTitle(r rune) bool
IsTitle 报告该 rune 是否为标题大写字母。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
fmt.Printf("%t\n", unicode.IsTitle('Dž'))
fmt.Printf("%t\n", unicode.IsTitle('a'))
}
true
false
func IsUpper
func IsUpper(r rune) bool
IsUpper 报告该 rune 是否为大写字母。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
fmt.Printf("%t\n", unicode.IsUpper('A'))
fmt.Printf("%t\n", unicode.IsUpper('a'))
}
true
false
func SimpleFold
func SimpleFold(r rune) rune
SimpleFold 遍历在 Unicode 定义的简单大小写折叠下等价的 Unicode 码点。 在与该 rune 等价的码点中(包括 rune 本身),SimpleFold 返回 大于 r 的最小 rune(如果存在),否则返回 >= 0 的最小 rune。 如果 r 不是有效的 Unicode 码点,SimpleFold(r) 返回 r。
例如:
SimpleFold('A') = 'a'
SimpleFold('a') = 'A'
SimpleFold('K') = 'k'
SimpleFold('k') = '\u212A' (Kelvin symbol, K)
SimpleFold('\u212A') = 'K'
SimpleFold('1') = '1'
SimpleFold(-2) = -2
Example
package main import ( "fmt" "unicode" ) func main() { fmt.Printf("%#U\n", unicode.SimpleFold('A')) // 'a' fmt.Printf("%#U\n", unicode.SimpleFold('a')) // 'A' fmt.Printf("%#U\n", unicode.SimpleFold('K')) // 'k' fmt.Printf("%#U\n", unicode.SimpleFold('k')) // '\u212A' (Kelvin symbol, K) fmt.Printf("%#U\n", unicode.SimpleFold('\u212A')) // 'K' fmt.Printf("%#U\n", unicode.SimpleFold('1')) // '1' }
Output:
U+0061 'a' U+0041 'A' U+006B 'k' U+212A 'K' U+004B 'K' U+0031 '1'
func To
func To(_case int, r rune) rune
To 将 rune 映射为指定的大小写:UpperCase、LowerCase 或 TitleCase。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
const lcG = 'g'
fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, lcG))
fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, lcG))
fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, lcG))
const ucG = 'G'
fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, ucG))
fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, ucG))
fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, ucG))
}
U+0047 'G'
U+0067 'g'
U+0047 'G'
U+0047 'G'
U+0067 'g'
U+0047 'G'
func ToLower
func ToLower(r rune) rune
ToLower 将 rune 映射为小写。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
const ucG = 'G'
fmt.Printf("%#U\n", unicode.ToLower(ucG))
}
U+0067 'g'
func ToTitle
func ToTitle(r rune) rune
ToTitle 将 rune 映射为标题大写。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
const ucG = 'g'
fmt.Printf("%#U\n", unicode.ToTitle(ucG))
}
U+0047 'G'
func ToUpper
func ToUpper(r rune) rune
ToUpper 将 rune 映射为大写。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
const ucG = 'g'
fmt.Printf("%#U\n", unicode.ToUpper(ucG))
}
U+0047 'G'
Types
type CaseRange
type CaseRange struct { Lo uint32 Hi uint32 Delta d }
CaseRange 表示用于简单(一个码点到一个码点)大小写转换的 Unicode 码点范围。 范围从 Lo 到 Hi(含两端),固定步长为 1。Delta 是需要加到码点上 以到达该字符不同大小写对应码点的数值。它们可以是负数。如果为零, 表示该字符已处于对应的大小写形式。有一种特殊情况表示 交替对应的大写和小写对的序列。它以固定 Delta 出现:
{UpperLower, UpperLower, UpperLower}
常量 UpperLower 具有一个在其他情况下不可能出现的 delta 值。
type Range16
type Range16 struct { Lo uint16 Hi uint16 Stride uint16 }
Range16 表示一个 16 位 Unicode 码点的范围。范围从 Lo 到 Hi(含两端), 具有指定的步长。
type Range32
type Range32 struct { Lo uint32 Hi uint32 Stride uint32 }
Range32 表示一个 Unicode 码点的范围,当一个或多个值无法用 16 位表示时使用。 范围从 Lo 到 Hi(含两端),具有指定的步长。Lo 和 Hi 必须始终 >= 1<<16。
type RangeTable
type RangeTable struct { R16 []Range16 R32 []Range32 LatinOffset int // R16 中 Hi <= MaxLatin1 的条目数量 }
RangeTable 通过列出集合内码点的范围来定义一组 Unicode 码点。 范围以两个切片列出以节省空间:一个 16 位范围的切片和一个 32 位范围的切片。 两个切片必须按升序排列且不重叠。 此外,R32 应仅包含 >= 0x10000 (1<<16) 的值。
type SpecialCase
type SpecialCase []CaseRange
SpecialCase 表示特定于语言的大小写映射,例如土耳其语。
SpecialCase 的方法通过覆盖标准映射来进行自定义。
Output:Example
package main
import (
"fmt"
"unicode"
)
func main() {
t := unicode.TurkishCase
const lci = 'i'
fmt.Printf("%#U\n", t.ToLower(lci))
fmt.Printf("%#U\n", t.ToTitle(lci))
fmt.Printf("%#U\n", t.ToUpper(lci))
const uci = 'İ'
fmt.Printf("%#U\n", t.ToLower(uci))
fmt.Printf("%#U\n", t.ToTitle(uci))
fmt.Printf("%#U\n", t.ToUpper(uci))
}
U+0069 'i'
U+0130 'İ'
U+0130 'İ'
U+0069 'i'
U+0130 'İ'
U+0130 'İ'
var AzeriCase SpecialCase = _TurkishCase
var TurkishCase SpecialCase = _TurkishCase
func (SpecialCase) ToLower
func (special SpecialCase) ToLower(r rune) rune
ToLower 将 rune 映射为小写,优先使用特殊映射。
func (SpecialCase) ToTitle
func (special SpecialCase) ToTitle(r rune) rune
ToTitle 将 rune 映射为标题大写,优先使用特殊映射。
func (SpecialCase) ToUpper
func (special SpecialCase) ToUpper(r rune) rune
ToUpper 将 rune 映射为大写,优先使用特殊映射。
Directories
| utf16 | utf16 包实现了 UTF-16 序列的编码和解码。 |
| utf8 | utf8 包实现了支持 UTF-8 编码文本的函数和常量。 |