% Here are the functions needed for Practical 2 % To use in Matlab, functions should be split into separate, appropriately % named files function [c] = CountWord(doc, w) % this function returns a count of how many times word with ID number w % appears in the document 'doc' (which consists of a vector of word ID numbers) c = 0; % using for loops: for i = 1:length(doc) if doc(i) == w c = c + 1; end end % or using vector methods c = sum(doc == w); end function [freqWords] = GetFrequentWords(doc, wordList) % this function finds the four words from list 'wordList' that occur most % frequently in document 'doc' % initialise a list of counters for each word in wordList wordCounts = zeros(1, length(wordList)); for j = 1:length(wordList) % use previous function to obtain word counts wordCounts(j) = CountWord(doc, j); end % sort the wordCounts in descending order, obtaining an index from the sort % (see help('sort') for more information) [sortedList sortOrder] = sort(wordCounts, 'descend'); % return the four most common words freqWords = sortOrder(1:4); end function [v] = GetCoocurrenceVector(doc, w, numwords) % get a co-occurrence vector for the word with ID w in the document 'doc', % with a context window of +/- 5. numwords is the number of unique words - % just used to initialise the co-ocurrence vector % initialise the co-occurrence vector with zeros v = zeros(numwords, 1); % loop over all words in the document for i = 1:length(doc) % if the ith word is w, we need to add the surrounding words to the % co-occurrence vector if doc(i) == w % get the context window (ensuring the beginning and end are valid) winStart = max(i-5,1); winEnd = min(i+5, length(doc)); contextWindow = winStart:winEnd; % a vector % now loop over all the words in the context window for j = contextWindow % don't add the word itself if j ~= i % doc(j) gives the position in the co-occurrence vector % of the jth word in the document v(doc(j)) = v(doc(j)) + 1; end end end end end