forked from golang-nlp/stopwords
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstopwords.go
More file actions
144 lines (114 loc) · 3.49 KB
/
stopwords.go
File metadata and controls
144 lines (114 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
package stopwords
import (
"sync"
"github.com/OpenSystemsLab/stopwords/data"
)
// Registry manages loaded languages to avoid loading all languages into memory
type Registry struct {
mu sync.RWMutex
languages map[string]map[string]struct{}
}
// DefaultRegistry is the global registry instance
var DefaultRegistry = NewRegistry()
// NewRegistry creates a new language registry
func NewRegistry() *Registry {
return &Registry{
languages: make(map[string]map[string]struct{}),
}
}
// RegisterLanguage loads and registers a specific language
func (r *Registry) RegisterLanguage(lang string) error {
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.languages[lang]; exists {
return nil // already loaded
}
langData, err := data.LoadLanguage(lang)
if err != nil {
return err
}
r.languages[lang] = langData
return nil
}
// RegisterLanguages loads and registers multiple languages
func (r *Registry) RegisterLanguages(langs ...string) error {
for _, lang := range langs {
if err := r.RegisterLanguage(lang); err != nil {
return err
}
}
return nil
}
// IsStopWord checks if a word is a stopword in the given language
func (r *Registry) IsStopWord(lang, word string) bool {
r.mu.RLock()
defer r.mu.RUnlock()
langData, exists := r.languages[lang]
if !exists {
return false // language not loaded
}
_, ok := langData[word]
return ok
}
// IsLanguageLoaded checks if a language is currently loaded
func (r *Registry) IsLanguageLoaded(lang string) bool {
r.mu.RLock()
defer r.mu.RUnlock()
_, exists := r.languages[lang]
return exists
}
// LoadedLanguages returns a list of currently loaded languages
func (r *Registry) LoadedLanguages() []string {
r.mu.RLock()
defer r.mu.RUnlock()
langs := make([]string, 0, len(r.languages))
for lang := range r.languages {
langs = append(langs, lang)
}
return langs
}
// UnregisterLanguage removes a language from memory
func (r *Registry) UnregisterLanguage(lang string) {
r.mu.Lock()
defer r.mu.Unlock()
delete(r.languages, lang)
}
// Clear removes all loaded languages from memory
func (r *Registry) Clear() {
r.mu.Lock()
defer r.mu.Unlock()
r.languages = make(map[string]map[string]struct{})
}
// Convenience functions for the default registry
// RegisterLanguage loads and registers a specific language in the default registry
func RegisterLanguage(lang string) error {
return DefaultRegistry.RegisterLanguage(lang)
}
// RegisterLanguages loads and registers multiple languages in the default registry
func RegisterLanguages(langs ...string) error {
return DefaultRegistry.RegisterLanguages(langs...)
}
// IsStopWord checks if a word is a stopword in the given language using the default registry
func IsStopWord(lang, word string) bool {
return DefaultRegistry.IsStopWord(lang, word)
}
// IsLanguageLoaded checks if a language is currently loaded in the default registry
func IsLanguageLoaded(lang string) bool {
return DefaultRegistry.IsLanguageLoaded(lang)
}
// LoadedLanguages returns a list of currently loaded languages from the default registry
func LoadedLanguages() []string {
return DefaultRegistry.LoadedLanguages()
}
// UnregisterLanguage removes a language from memory in the default registry
func UnregisterLanguage(lang string) {
DefaultRegistry.UnregisterLanguage(lang)
}
// Clear removes all loaded languages from memory in the default registry
func Clear() {
DefaultRegistry.Clear()
}
// GetSupportedLanguages returns a list of all supported languages
func GetSupportedLanguages() []string {
return data.GetSupportedLanguages()
}