1-5
Lab 1
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize
def tokenize_sentences(text):
sentences = sent_tokenize(text)
return sentences
text = "NLTK is a leading platform for building Python programs to work with human language data. It provides easy-to-use interfaces to over 50 corpora and lexical resources such as WordNet,along with a suite of text processing libraries for classification, tokenization, stemming, tagging,parsing, and semantic reasoning, wrappers for industrial-strength NLP libraries, and an activediscussion forum."
sentences = tokenize_sentences(text)
for i, sentence in enumerate(sentences):
print(f"Sentence {i+1}: {sentence}")
import nltk
from nltk.tokenize import word_tokenize
word_tokenize('won’t')
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
def tokenize_words(text):
words = word_tokenize(text)
return words
text = "NLTK is a leading platform for building Python programs to work with human language data."
words = tokenize_words(text)
print(words)
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize
def tokenize_sentences(text):
sentences = sent_tokenize(text)
return sentences
text = "NLTK is a leading platform for building Python programs to work with human language data. It provides easy-to-use interfaces to over 50 corpora and lexical resources such as WordNet,along with a suite of text processing libraries for classification, tokenization, stemming, tagging,parsing, and semantic reasoning, wrappers for industrial-strength NLP libraries, and an activediscussion forum."
sentences = tokenize_sentences(text)
for i, sentence in enumerate(sentences):
print(f"Sentence {i+1}: {sentence}")
import nltk
from nltk.tokenize import word_tokenize
word_tokenize('won’t')
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
def tokenize_words(text):
words = word_tokenize(text)
return words
text = "NLTK is a leading platform for building Python programs to work with human language data."
words = tokenize_words(text)
print(words)
Lab 2
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')
def remove_stopwords(text):
words = word_tokenize(text)
english_stopwords = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in english_stopwords]
filtered_text = ' '.join(filtered_words)
return filtered_text
text = "NLTK is a leading platform for building Python programs to work with human language data."
filtered_text = remove_stopwords(text)
print(filtered_text)
Lab 3
import nltk
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
# Download NLTK tokenizer and stemmer models
nltk.download('punkt')
def stem_text(text):
porter_stemmer = PorterStemmer()
words = word_tokenize(text)
stemmed_words = [porter_stemmer.stem(word) for word in words]
stemmed_text = ' '.join(stemmed_words)
return stemmed_text
text = "NLTK is a leading platform for building Python programs to work with human language data."
stemmed_text = stem_text(text)
print(stemmed_text)
Lab 4
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('wordnet')
def lemmatize_text(text):
lemmatizer = WordNetLemmatizer()
tokens = word_tokenize(text)
lemmatized_text = ' '.join([lemmatizer.lemmatize(word) for word in tokens])
return lemmatized_text
text = "The cats are chasing mice and playing in the garden"
lemmatized_text = lemmatize_text(text)
print("Original Text:", text)
print("Lemmatized Text:", lemmatized_text)
Lab 5
import nltk
from nltk.tokenize import word_tokenize
# Download NLTK tokenizer and POS tagging models
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger') # This might download the English model, but let's be explicit below
nltk.download('averaged_perceptron_tagger_eng') # Explicitly download the English tagger model
def pos_tagging(text):
words = word_tokenize(text)
tagged_words = nltk.pos_tag(words)
return tagged_words
text = "NLTK is a leading platform for building Python programs to work with human language data."
tagged_text = pos_tagging(text)
# Print POS tagged text
print(tagged_text)
Comments
Post a Comment