/** @module regex */
import * as util from './util.js';
import * as consts from './consts.js';
import * as graph_components from './graph_components.js';
import * as graph_ops from './graph_ops.js';
/**
* inserts omitted concatenation symbols in regular expression
* @param {string} expressionString - the string to inject concatenation symbols in
* @returns {string} the string with concatenation symbols inserted
*/
export function injectConcatSymbols(expressionString) {
let result = '';
for (let i = 0; i < expressionString.length; i++) {
let currChar = expressionString.charAt(i);
result += currChar;
if (i + 1 < expressionString.length) {
let nextChar = expressionString.charAt(i + 1);
if (currChar !== consts.OPEN && currChar !== consts.UNION
&& currChar !== consts.CONCAT && nextChar !== consts.CLOSE
&& nextChar !== consts.UNION && nextChar !== consts.KLEENE
&& nextChar !== consts.PLUS && nextChar !== consts.CONCAT) {
result += consts.CONCAT;
}
}
}
return result;
}
/**
* given an operator, gets the operator's precedence
* @param {string} key - the operator to get the predecence of
* @returns {Number} the predecence of the operator as an int
*/
export function getPrecendence(key) {
switch (key) {
case consts.KLEENE:
return 3;
case consts.CONCAT:
return 2;
case consts.UNION:
return 1;
default:
return 0;
}
}
/**
* applies the shunting-yard algorithm to convert regular expression to postfix notation
* @param {string} string - the string to convert to postfix
* @returns {string} the string in postfix notation
*/
export function shunting_yard(string) {
let stack = new util.Stack();
let queue = new util.Queue();
for (let ch of string) {
// case if ch is a character or epsilon
if (ch.match(/[a-zA-Z0-9]/i) || ch === consts.EMPTY || ch === consts.SIGMA) {
queue.enqueue(ch);
}
// case if ch is an operator
else if (ch === consts.UNION || ch === consts.KLEENE || ch === consts.CONCAT) {
//while (precedence[stack.peek()] > precedence[ch]) {
while (getPrecendence(stack.peek()) >= getPrecendence(ch)) {
queue.enqueue(stack.pop());
}
stack.push(ch);
}
// case if ch is (
else if (ch === consts.OPEN) {
stack.push(ch);
}
// case if ch is )
else if (ch === consts.CLOSE) {
while (stack.peek() !== consts.OPEN) {
queue.enqueue(stack.pop());
}
stack.pop();
}
}
//empty remaining elements from stack
while (!stack.isEmpty()) {
queue.enqueue(stack.pop());
}
// put resulting string together
let result = '';
while (queue.length > 0) {
result += queue.dequeue();
}
return result;
}
/**
* performs the union operation on two graphs
* @param {string} graph1 - the first graph
* @param {string} graph2 - the second graph
* @returns {string} the new resulting graph of the union operation
*/
export function union(graph1, graph2) {
let graph = {};
graph['q0'] = graph_components.make_vertex('q0', 300, 300, consts.DEFAULT_VERTEX_RADIUS, true, false, []);
let graph1start = '';
let graph2start = '';
// rename all nodes in graph1
for (let node of Object.keys(graph1)) {
// add a check for if the new name is already in the graph
let new_name = node + ',1';
if (new_name in graph1) {
new_name = new_name + ',n';
}
graph_ops.rename_vertex(graph1, node, new_name, true);
// check if current node is graph1's start node
if (graph1[new_name].is_start) {
graph1start = new_name;
graph1[new_name].is_start = false;
}
}
// rename all nodes in graph2
for (let node of Object.keys(graph2)) {
let new_name = node + ',2';
if (new_name in graph2) {
new_name = new_name + ',n';
}
graph_ops.rename_vertex(graph2, node, new_name, true);
// check if current node is graph2's start node
if (graph2[new_name].is_start) {
graph2start = new_name;
graph2[new_name].is_start = false;
}
}
// add all nodes to new graph
for (let node of Object.keys(graph1)) {
graph[node] = structuredClone(graph1[node]);
}
for (let node of Object.keys(graph2)) {
graph[node] = structuredClone(graph2[node]);
}
// add epsilon transitions from new start to old starts
graph['q0'].out.push(graph_components.make_edge('q0', graph1start, consts.EMPTY_SYMBOL, 0.5, 0, 0, 0,
consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL));
graph['q0'].out.push(graph_components.make_edge('q0', graph2start, consts.EMPTY_SYMBOL, 0.5, 0, 0, 0,
consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL));
return graph;
}
/**
* performs the concatenation operation on two graphs
* @param {string} graph1 - the first graph
* @param {string} graph2 - the second graph
* @returns {string} the new resulting graph of the concatenation operation
*/
export function concat(graph1, graph2) {
let graph1accept = [];
let graph2start = '';
// modify names of all nodes in graph1
for (let node of Object.keys(graph1)) {
let new_name = node + ',1';
if (new_name in graph1) {
new_name = new_name + ',n';
}
graph_ops.rename_vertex(graph1, node, new_name, true);
// check if current node is an accept node for graph1
if (graph1[new_name].is_final) {
graph1accept.push(new_name);
graph1[new_name].is_final = false;
}
}
// modify names of all nodes in graph2
for (let node of Object.keys(graph2)) {
let new_name = node + ',2';
if (new_name in graph2) {
new_name = new_name + ',n';
}
graph_ops.rename_vertex(graph2, node, new_name, true);
// check if current node is graph2's start node
if (graph2[new_name].is_start) {
graph2start = new_name;
graph2[new_name].is_start = false;
}
}
// add all nodes from graph1 and graph2 to a new graph
let graph = {};
for (let node of Object.keys(graph1)) {
graph[node] = structuredClone(graph1[node]);
}
for (let node of Object.keys(graph2)) {
graph[node] = structuredClone(graph2[node]);
}
// create edges from all accept nodes in graph1 to the start node in graph2
for (let node of graph1accept) {
graph[node].out.push(graph_components.make_edge(node, graph2start, consts.EMPTY_SYMBOL,
0.5, 0, 0, 0, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL));
}
return graph;
}
/**
* performs the union operation on two graphs
* @param {string} graph - the graph
* @returns {string} the new resulting graph of the kleene operation
*/
export function kleene(graph) {
// make new start and end states
graph['new start'] = graph_components.make_vertex('new start', 300, 300, consts.DEFAULT_VERTEX_RADIUS, true, false, []);
graph['new end'] = graph_components.make_vertex('new end', 1000, 300, consts.DEFAULT_VERTEX_RADIUS, false, true, []);
graph['new start'].out.push(graph_components.make_edge('new start', 'new end', consts.EMPTY_SYMBOL, 0.5, 0, 0, 0, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL));
let oldStart = '';
for (let node of Object.keys(graph)) {
// for each start state in original graph, create an edge from new start state to original start state
// set node to not be a start state
if (graph[node].is_start && node !== 'new start') {
oldStart = node;
graph[node].is_start = false;
graph['new start'].out.push(graph_components.make_edge('new start', node, consts.EMPTY_SYMBOL, 0.5, 0, 0, 0, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL));
}
// for each accept state in original graph, create an edge from that node to new accept state
// set each node to not be an accept state
if (graph[node].is_final && node !== 'new end') {
graph[node].is_final = false;
graph[node].out.push(graph_components.make_edge(node, oldStart, consts.EMPTY_SYMBOL, 0.5, 0, 0, 0, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL));
graph[node].out.push(graph_components.make_edge(node, 'new end', consts.EMPTY_SYMBOL, 0.5, 0, 0, 0, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL));
}
}
return graph;
}
function single_transition(char) {
let graph = {};
graph['q0'] = graph_components.make_vertex('q0', 300, 300, consts.DEFAULT_VERTEX_RADIUS, true, false, []);
graph['q1'] = graph_components.make_vertex('q1', 300, 300, consts.DEFAULT_VERTEX_RADIUS, false, true, []);
graph['q0'].out.push(graph_components.make_edge('q0', 'q1', char, 0.5, 0, 0, 0, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL, consts.EMPTY_SYMBOL));
return graph;
}
export function thompson(regex) {
// stack of NFA pieces
let stack = new util.Stack();
for (let c of regex) {
if (c.match(/[a-zA-Z0-9]/i) || c === consts.EMPTY || c === consts.SIGMA) {
// make graph of single vertex
stack.push(single_transition(c));
} else if (c === consts.UNION) {
let g2 = stack.pop();
let g1 = stack.pop();
let res = union(g1, g2);
stack.push(res);
} else if (c === consts.CONCAT) {
let g2 = stack.pop();
let g1 = stack.pop();
let res = concat(g1, g2);
stack.push(res);
} else if (c === consts.KLEENE) {
let g = stack.pop();
let res = kleene(g);
stack.push(res);
}
}
return stack.pop();
}
export function create_buttons() {
let input_field = document.getElementById('regex_string');
const insertChar = (char) => {
if (input_field.selectionStart || input_field.selectionStart === '0') {
var startPos = input_field.selectionStart;
var endPos = input_field.selectionEnd;
input_field.value = input_field.value.substring(0, startPos)
+ char
+ input_field.value.substring(endPos, input_field.value.length);
} else {
input_field.value += char;
}
};
const setCaretPosition = (ctrl, pos) => {
// Modern browsers
if (ctrl.setSelectionRange) {
ctrl.focus();
ctrl.setSelectionRange(pos, pos);
// IE8 and below
} else if (ctrl.createTextRange) {
var range = ctrl.createTextRange();
range.collapse(true);
range.moveEnd('character', pos);
range.moveStart('character', pos);
range.select();
}
};
let open_btn = document.getElementById('OPEN');
open_btn.addEventListener('click', () => {
const idx = input_field.selectionStart + 1;
insertChar(consts.OPEN);
setCaretPosition(input_field, idx);
});
let close_btn = document.getElementById('CLOSE');
close_btn.addEventListener('click', () => {
const idx = input_field.selectionStart + 1;
insertChar(consts.CLOSE);
setCaretPosition(input_field, idx);
});
let union_btn = document.getElementById('UNION');
union_btn.addEventListener('click', () => {
const idx = input_field.selectionStart + 1;
insertChar(consts.UNION);
setCaretPosition(input_field, idx);
});
let concat_btn = document.getElementById('CONCAT');
concat_btn.addEventListener('click', () => {
const idx = input_field.selectionStart + 1;
insertChar(consts.CONCAT);
setCaretPosition(input_field, idx);
});
let kleene_btn = document.getElementById('KLEENE');
kleene_btn.addEventListener('click', () => {
const idx = input_field.selectionStart + 1;
insertChar(consts.KLEENE);
setCaretPosition(input_field, idx);
});
// const plus_btn = document.getElementById('PLUS');
let sigma_btn = document.getElementById('SIGMA');
sigma_btn.addEventListener('click', () => {
const idx = input_field.selectionStart + 1;
insertChar(consts.SIGMA);
setCaretPosition(input_field, idx);
});
let empty_btn = document.getElementById('EMPTY_SET');
empty_btn.addEventListener('click', () => {
const idx = input_field.selectionStart + 1;
insertChar(consts.EMPTY_SET);
setCaretPosition(input_field, idx);
});
}
// add function to check for valid regex string
// 4. check for valid character, instant reject for non valid characters
// 5. handle case with empty set
// function taken from https://github.com/flapjs/webapp/blob/master/src/modules/re/machine/RegularExpression.js
export function areParenthesisBalanced(expressionString)
{
let count = 0;
for (let i = 0; i < expressionString.length; i++)
{
let symbol = expressionString.charAt(i);
if (symbol === consts.OPEN) count++;
else if (symbol === consts.CLOSE) count--;
if (count < 0) return false;
}
return count === 0;
}
export function isValidRegex(string) {
// remove spaces from input string
string = string.replace(/\s+/g, '');
// check for invalid parentheses and empty string
if (!areParenthesisBalanced(string) || string === "") {
return false;
}
let prev;
for (const char of string) {
// first character of string
if (prev === undefined) {
prev = char;
}
// check case that we have two operators in a row
if ((char === consts.UNION || char === consts.KLEENE || char === consts.CONCAT)
&&
(prev === consts.UNION || prev === consts.KLEENE || prev === consts.CONCAT)
) {
return false;
}
prev = char;
}
return true;
}
export function process_string(string) {
let injectedConcat = injectConcatSymbols(string);
let postfix = shunting_yard(injectedConcat);
let finalGraph = thompson(postfix);
return finalGraph
}