1-5

Lab 1 


import nltk

nltk.download('punkt') 
nltk.download('punkt_tab') 
from nltk.tokenize import sent_tokenize
def tokenize_sentences(text):
    sentences = sent_tokenize(text)  
    return sentences

text = "NLTK is a leading platform for building Python programs to work with human language data. It provides easy-to-use interfaces to over 50 corpora and lexical resources such as WordNet,along with a suite of text processing libraries for classification, tokenization, stemming, tagging,parsing, and semantic reasoning, wrappers for industrial-strength NLP libraries, and an activediscussion forum."

sentences = tokenize_sentences(text)

for i, sentence in enumerate(sentences):
    print(f"Sentence {i+1}: {sentence}")

import nltk
from nltk.tokenize import word_tokenize
word_tokenize('won’t')

import nltk
nltk.download('punkt') 
from nltk.tokenize import word_tokenize
def tokenize_words(text):
    words = word_tokenize(text)
    return words
text = "NLTK is a leading platform for building Python programs to work with human language data."
words = tokenize_words(text)
print(words)

Lab 2 


import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')
def remove_stopwords(text):
    words = word_tokenize(text)
    english_stopwords = set(stopwords.words('english'))
    filtered_words = [word for word in words if word.lower() not in english_stopwords]
    
    filtered_text = ' '.join(filtered_words)
    return filtered_text 

text = "NLTK is a leading platform for building Python programs to work with human language data."
filtered_text = remove_stopwords(text)
print(filtered_text)


Lab 3 

import nltk
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
# Download NLTK tokenizer and stemmer models
nltk.download('punkt')
def stem_text(text):
  porter_stemmer = PorterStemmer()
  words = word_tokenize(text)
  stemmed_words = [porter_stemmer.stem(word) for word in words]
  stemmed_text = ' '.join(stemmed_words)
  return stemmed_text
  
  text = "NLTK is a leading platform for building Python programs to work with human language data."

stemmed_text = stem_text(text)

print(stemmed_text)


Lab 4 


from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('wordnet')
def lemmatize_text(text):
  lemmatizer = WordNetLemmatizer()
  tokens = word_tokenize(text)
  lemmatized_text = ' '.join([lemmatizer.lemmatize(word) for word in tokens])
  return lemmatized_text
  text = "The cats are chasing mice and playing in the garden"
lemmatized_text = lemmatize_text(text)
print("Original Text:", text)
print("Lemmatized Text:", lemmatized_text)


Lab 5 

import nltk
from nltk.tokenize import word_tokenize
# Download NLTK tokenizer and POS tagging models
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger') # This might download the English model, but let's be explicit below
nltk.download('averaged_perceptron_tagger_eng') # Explicitly download the English tagger model
def pos_tagging(text):
  words = word_tokenize(text)
  tagged_words = nltk.pos_tag(words)
  return tagged_words
text = "NLTK is a leading platform for building Python programs to work with human language data."
tagged_text = pos_tagging(text)
# Print POS tagged text
print(tagged_text)

Comments

Popular posts from this blog

Web

Lab 1 ai