Posts

Showing posts from May, 2025

5

import nltk from nltk.tokenize import word_tokenize nltk.download('punkt') nltk.download('averaged_perceptron_tagger') def pos_tagging(text): words = word_tokenize(text) tagged_words = nltk.pos_tag(words) return tagged_words text = "NLTK is a leading platform for building Python programs to work with human language data." tagged_text = pos_tagging(text) print(tagged_text)

4

from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer nltk.download('punkt') nltk.download('wordnet') def lemmatize_text(text): lemmatizer = WordNetLemmatizer() tokens = word_tokenize(text) lemmatized_text = ' '.join([lemmatizer.lemmatize(word) for word in tokens]) return lemmatized_text text = "The cats are chasing mice and playing in the garden" lemmatized_text = lemmatize_text(text) print("Original Text:", text) print("Lemmatized Text:", lemmatized_text

3

import nltk from nltk.stem import PorterStemmer from nltk.tokenize import word_tokenize nltk.download('punkt') def stem_text(text): porter_stemmer = PorterStemmer() words = word_tokenize(text) stemmed_words = [porter_stemmer.stem(word) for word in words] stemmed_text = ' '.join(stemmed_words) return stemmed_text text = "NLTK is a leading platform for building Python programs to work with human language data." stemmed_text = stem_text(text) print(stemmed_text

2

import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize nltk.download('stopwords') nltk.download('punkt') def remove_stopwords(text): words = word_tokenize(text) english_stopwords = set(stopwords.words('english')) filtered_words = [word for word in words if word.lower() not in english_stopwords] filtered_text = ' '.join(filtered_words) return filtered_text text = "NLTK is a leading platform for building Python programs to work with human language data." filtered_text = remove_stopwords(text) print(filtered_text)

1

 Week 1: Write a python program to perform tokenization by word and sentence using nltk. Program for sentence tokenization: import nltk nltk.download('punkt') # Download the necessary tokenization models from nltk.tokenize import sent_tokenize def tokenize_sentences(text): sentences = sent_tokenize(text) return sentences text = "NLTK is a leading platform for building Python programs to work with human language data. It provides easy-to-use interfaces to over 50 corpora and lexical resources such as WordNet, along with a suite of text processing libraries for classification, tokenization, stemming, tagging, parsing, and semantic reasoning, wrappers for industrial-strength NLP libraries, and an active discussion forum." sentences = tokenize_sentences(text) for i, sentence in enumerate(sentences): print(f"Sentence {i+1}: {sentence}") import nltk from nltk.tokenize import word_tokenize word_tokenize('won’t') :--Program for word Tokenization: import nltk...