`
roki
  • 浏览: 60464 次
  • 性别: Icon_minigender_1
  • 来自: 上海
文章分类
社区版块
存档分类
最新评论

《搜索引擎零距离》IRQL语言的解析

阅读更多
package com.rayeen.spider.vertical.util;

import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import com.rayeen.spider.vertical.ParalleIRVirtualMachine;
import com.rayeen.spider.vertical.algorithm.AddFunction;
import com.rayeen.spider.vertical.algorithm.ClearTagFunction;
import com.rayeen.spider.vertical.algorithm.DoubleParameterFunction;
import com.rayeen.spider.vertical.algorithm.EqNullOperator;
import com.rayeen.spider.vertical.algorithm.EqOperator;
import com.rayeen.spider.vertical.algorithm.EqStringOperator;
import com.rayeen.spider.vertical.algorithm.FullUrlFunction;
import com.rayeen.spider.vertical.algorithm.Function;
import com.rayeen.spider.vertical.algorithm.MaxlengthFunction;
import com.rayeen.spider.vertical.algorithm.Operator;
import com.rayeen.spider.vertical.algorithm.RecursiveFunction;
import com.rayeen.spider.vertical.algorithm.ReplaceFunction;
import com.rayeen.spider.vertical.algorithm.SprintfFunction;
import com.rayeen.spider.vertical.algorithm.UneqNullOperator;
import com.rayeen.spider.vertical.algorithm.UneqOperator;
import com.rayeen.spider.vertical.algorithm.UneqStringOperator;
import com.rayeen.spider.vertical.algorithm.UniParameterFunction;
import com.rayeen.spider.vertical.auxiliary.CrawlResultSetCollection;
import com.rayeen.spider.vertical.auxiliary.SemanticException;
import com.rayeen.spider.vertical.auxiliary.TableMerge;
import com.rayeen.spider.vertical.constant.ArgumentType;
import com.rayeen.spider.vertical.constant.ConfConstant;
import com.rayeen.spider.vertical.constant.ErrorType;
import com.rayeen.spider.vertical.constant.FunctionConstant;

public class ResutTree {

	static final Logger LOG = Logger.getLogger(ResutTree.class);

	static Map<String, Function> FunctionNameMap = new ConcurrentHashMap<String, Function>();

	// 单参数的函数
	static Set<String> uniParameterFunction = new HashSet<String>();

	//
	static Set<String> doubleParameterFunction = new HashSet<String>();

	static {
		uniParameterFunction.add(FunctionConstant.FULL_URL);
		uniParameterFunction.add(FunctionConstant.CLEAR_TAG);

		doubleParameterFunction.add(FunctionConstant.MAX_LENGTH);
		doubleParameterFunction.add(FunctionConstant.ADD);

	}

	static Function getFunctionInstance(String func) {

		Class cls = FunctionNameMap.get(func).getClass();
		Function f = null;
		try {
			f = (Function) cls.newInstance();
		} catch (InstantiationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IllegalAccessException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		return f;
	}

	// 考虑f(..) showF:toByte情况
	static Pattern FUNC_PATTERN = Pattern
			.compile("(\\w+)\\s*\\((.+?)\\)\\s+(\\w+(:\\w+)?)");

	Map<String, Function> fieldFunctionMap = new ConcurrentHashMap<String, Function>();

	Map<String, Function> showFieldFunctionMap = new ConcurrentHashMap<String, Function>();

	static Map<String, Operator> operators = new LinkedHashMap<String, Operator>();
	static {
		operators.put("!=", new UneqOperator());
		operators.put("=", new EqOperator());
		operators.put("is", new EqNullOperator());
		operators.put("not", new UneqNullOperator());

		FunctionNameMap.put(FunctionConstant.FULL_URL, new FullUrlFunction());
		FunctionNameMap.put(FunctionConstant.SPRINGTF, new SprintfFunction());
		FunctionNameMap
				.put(FunctionConstant.RECURSIVE, new RecursiveFunction());
		FunctionNameMap.put(FunctionConstant.ADD, new AddFunction());
		FunctionNameMap.put(FunctionConstant.REPLACE, new ReplaceFunction());
		FunctionNameMap.put(FunctionConstant.CLEAR_TAG, new ClearTagFunction());
		FunctionNameMap.put(FunctionConstant.MAX_LENGTH,
				new MaxlengthFunction());

	}

	public ResutTree(URL rootUrl,
			Map<String, Map<String, String>> hierarchyResult,
			CrawlResultSetCollection crawlRSC) {

		this.rootUrl = rootUrl;
		this.hierarchyResult = hierarchyResult;
		this.crawlRSC = crawlRSC;
		/**
		 * curUniResultTableMap里的数据的key以"tableName->extractName"构成
		 */
	}

	static void error(String str) throws SemanticException {
		LOG.error(str);
		throw new SemanticException(str);
	}

	static void warn(String str) {
		LOG.error(str);
	}

	URL rootUrl;

	Map<String, Map<String, String>> hierarchyResult = new HashMap<String, Map<String, String>>();

	CrawlResultSetCollection crawlRSC;

	Map<String, TableMerge> mergeMap = new ConcurrentHashMap();

	class Pfk {
		String p;// 表名

		String f;// 内部字段

		String k;// 表示字段

		public Pfk(String p, String f, String k) {
			super();
			this.p = p;
			this.f = f;
			this.k = k;

			if (StringUtils.isEmpty(k)) {
				this.k = f;
			}
		}

		public String toString() {
			return p + ":" + f + ":" + k;
		}
	}

	class Pkpk {

		Operator operator;

		ArgumentType argumentType;

		List<String> argList;

		/**
		 * 不同argmentType ,和operator会对argList做不同的处理
		 * 
		 */
		public Pkpk(ArgumentType argmentType, Operator operator,
				List<String> argList) {
			this.argumentType = argmentType;
			this.operator = operator;
			this.argList = argList;
		}

		public String toString() {
			return argumentType + ":" + operator + ":" + argList.toString();
		}

		public List<String> getArgList() {
			return argList;
		}

		public void setArgList(List<String> argList) {
			this.argList = argList;
		}

		public ArgumentType getArgmentType() {
			return argumentType;
		}

		public void setArgmentType(ArgumentType argmentType) {
			this.argumentType = argmentType;
		}

		public Operator getOperator() {
			return operator;
		}

		public void setOperator(Operator operator) {
			this.operator = operator;
		}

	}

	private List<Map> filterFinalResult(List<Map<String, String>> mainRows,
			List<Pfk> pfkList, ArrayList<Pfk> hierarchyPfkList,
			List<Pkpk> pkpkList) throws SemanticException {

		Map<String, String> fieldMap = new HashMap<String, String>();
		for (Pfk tmp : pfkList) {
			if (fieldMap.containsKey(tmp.k)) {
				error("duplicate show key :" + tmp.k);
			} else {
				fieldMap.put(tmp.p + "." + tmp.f, tmp.k);
			}
		}

		for (Pfk tmp : hierarchyPfkList) {
			if (fieldMap.containsKey(tmp.k)) {
				error("duplicate show key :" + tmp.k);
			} else {
				fieldMap.put(tmp.p + "." + tmp.f, tmp.k);
			}
		}

		List<Map> resultList = new ArrayList<Map>();
		for (Map<String, String> res : mainRows) {

			boolean fit = true;
			for (Pkpk p : pkpkList) {

				if (p.getArgmentType() == ArgumentType.ONE) {
					String p1 = p.getArgList().get(0);
					String f1 = p.getArgList().get(1);
					if (!p.getOperator().operator(res.get(p1 + "." + f1))
							.equals(ConfConstant.TRUE)) {
						fit = false;
						break;
					}
				}

				if (p.getArgmentType() == ArgumentType.TWO) {
					String p1 = p.getArgList().get(0);
					String f1 = p.getArgList().get(1);

					String p2 = p.getArgList().get(2);
					String f2 = p.getArgList().get(3);

					if (!p.getOperator().operator(res.get(p1 + "." + f1),
							res.get(p2 + "." + f2)).equals(ConfConstant.TRUE)) {
						fit = false;
						break;
					}
				}

			}


			if (fit) {

				Map<String, String> tmpMap = new HashMap<String, String>();
				for (String key : res.keySet()) {
					if (fieldMap.containsKey(key) &&  !fieldMap.get(key).endsWith(":FUNCTION")) {

						String value = res.get(key);

						tmpMap.put(fieldMap.get(key), value);
					}
				}

				// 添加由function产生的字段
				for (String key : showFieldFunctionMap.keySet()) {
					Function func = showFieldFunctionMap.get(key);
					String fieldValue = "";

					if (func instanceof FullUrlFunction) {
						String field = ((FullUrlFunction) func).getField();
						fieldValue = res.get(field);
						// tmpMap.remove(ParseUtils.parseFieldShowValue(field));
						fieldValue = func.operator(new Object[] { rootUrl,
								fieldValue });

					} else if (func instanceof SprintfFunction) {
						SprintfFunction sf = (SprintfFunction) func;
						List<String> fields = sf.getFields();
						List<String> args = new ArrayList<String>();
						args.add(sf.getFormat());
						for (String f : fields) {
							args.add(res.get(f));
							// tmpMap.remove(ParseUtils.parseFieldShowValue(f));
						}
						fieldValue = sf.operator(args.toArray());

					} else if (func instanceof ReplaceFunction) {
						ReplaceFunction rpf = (ReplaceFunction) func;
						String field = res.get(rpf.getField());
						fieldValue = rpf.operator(new Object[] { field,
								rpf.getPatternStr(), rpf.getReplaceStr() });

						// tmpMap.remove(ParseUtils.parseFieldShowValue(addf.getField()));
					} else if (func instanceof ClearTagFunction) {
						ClearTagFunction ctf = (ClearTagFunction) func;
						String value = res.get(ctf.getField());
						fieldValue = ctf.operator(new Object[] { value });

						// tmpMap.remove(ParseUtils.parseFieldShowValue(addf.getField()));
					} else if (func instanceof DoubleParameterFunction) {
						// 通用些的放在后面尝试匹配
						// 单参数的,普通形式的函数,包括Add,Maxlength
						String field = ((DoubleParameterFunction) func)
								.getField();
						String parameter = ((DoubleParameterFunction) func)
								.getParameter();
						fieldValue = res.get(field);
						fieldValue = func.operator(new Object[] { fieldValue });

					} else if (func instanceof RecursiveFunction) {

						RecursiveFunction rf = (RecursiveFunction) func;
						List<String> args = new ArrayList<String>();

						String functions = rf.getFunctions();
						args.add(functions);

						String[] params = rf.getParams();

						for (String f : params) {
							// 如果是字段名,则计算字段的值
							if (!f.startsWith("\"")) {
								args.add(res.get(f));
							} else { // 否则,直接添加这个函数
								args.add(ParseUtils.parseStrContent(f));
							}
							// tmpMap.remove(ParseUtils.parseFieldShowValue(f));
						}
						fieldValue = rf.operator(args.toArray());

					}

					// fieldValue=showFieldFunctionMap.get(key).operator(new
					// Object[]{value});
					tmpMap.put(key, fieldValue);
				}

				resultList.add(tmpMap);
			}

		}
		return resultList;

	}

	/**
	 * get main row and call "filterFinalResult"
	 * 
	 * @param pageNameMap
	 * @param pfkList
	 * @param pkpkList
	 * @return
	 * @throws SemanticException
	 */
	@SuppressWarnings("unchecked")
	private List<Map> getFinalResult(Map<String, String> pageNameMap,
			List<Pfk> pfkList, ArrayList<Pfk> hierarchyPfkList,
			List<Pkpk> pkpkList) throws SemanticException {

		long threadId = Thread.currentThread().getId();

		// 把表名排序
		Collections.sort(pfkList, new Comparator() {

			public int compare(Object o1, Object o2) {
				Pfk p1 = (Pfk) o1;
				Pfk p2 = (Pfk) o2;
				return p1.p.compareToIgnoreCase(p2.p);
			}

		});
		//

		// 某个表在显示中涉及的式子
		Map<String, Set<String>> pkfMap = new HashMap();
		for (Pfk tmp : pfkList) {
			if (!pkfMap.containsKey(tmp.p)) {
				Set<String> set = new HashSet();
				set.add(tmp.f);
				pkfMap.put(tmp.p, set);
			} else {
				pkfMap.get(tmp.p).add(tmp.f);
			}

		}

		Map<String, Set<String>> hierarchyPkfMap = new HashMap();
		for (Pfk tmp : hierarchyPfkList) {
			if (!hierarchyPkfMap.containsKey(tmp.p)) {
				Set<String> set = new HashSet();
				set.add(tmp.f);
				hierarchyPkfMap.put(tmp.p, set);
			} else {
				hierarchyPkfMap.get(tmp.p).add(tmp.f);
			}

		}

		// 这个Map记录了某个数据表在IRQL中的投影字段中涉及的字段名。
		// 投影字段可能是空的。
		Map<String, Set<String>> pkpkMap = new HashMap();

		for (Pkpk tmp : pkpkList) {

			if (tmp.getArgmentType() == ArgumentType.ONE) {
				if (tmp.getArgList().size() >= 2) {
					String page = tmp.getArgList().get(0);
					String field = tmp.getArgList().get(1);

					if (!pkpkMap.containsKey(page)) {
						Set<String> set = new HashSet();
						pkpkMap.put(page, set);
					}
					// 先添加set
					// 再往set里加东西
					// 前提是表名在显示列里出现过了
					if (pkfMap.containsKey(page)) {
						pkpkMap.get(page).add(field);
					}
				}

			}

			if (tmp.getArgmentType() == ArgumentType.TWO) {
				if (tmp.getArgList().size() == 2) {
					String page = tmp.getArgList().get(2);
					String field = tmp.getArgList().get(3);

					if (!pkpkMap.containsKey(page)) {
						Set<String> set = new HashSet();
						pkpkMap.put(page, set);
					}

					if (pkfMap.containsKey(page)) {
						pkpkMap.get(page).add(field);
					}
				}
			}

		}

		String curTableName = "";
		String exTableName = "";
		// 以select P1.bcname bcname,P2.scame scame,P3.songname
		// songname,P3.downlink downlink"
		// 为主线,从左到右做连接

		List<Map<String, String>> mainRows = new ArrayList();

		for (Pfk pfk : pfkList) {

			// 一条临时结果
			// 获取表名

			if (!pageNameMap.containsKey(pfk.p)) {
				ParalleIRVirtualMachine.error("invalid pagename:" + pfk.p,
						ErrorType.SEMANTIC);
			}
			String tableName = pageNameMap.get(pfk.p);

			if (mergeMap.containsKey(tableName)) {
				continue;
			}

			if (null == tableName) {
				ParalleIRVirtualMachine.error("invalid page alias" + pfk.p,
						ErrorType.SEMANTIC);
			}

			curTableName = tableName;

			List<Map<String, String>> rows = new ArrayList();

			// 数据不在共享表中的话,到独立表中去找
			// "下载页->down"这种表明肯定在shareTable中找不到
			// 更好的写法应该是 if(tableName.contains("->"))
			if (crawlRSC.getGlobalShareResultTableMap(tableName).size() == 0) {

				// 页名->规则名
				String[] prPair = tableName.split("->");
				if (prPair.length == 2) {

					String pageName = prPair[0];
					String ruleName = prPair[1];
					// 页间总表集合和页内独立表集合中都没有那个表
					if (crawlRSC.getGlobalUniResultTableMap(pageName, ruleName)
							.size() == 0) {
						warn("Thread-" + threadId
								+ ":invalid uniTable pagename:" + pageName
								+ "->" + ruleName);
						continue;
					} else {

						rows = crawlRSC.getGlobalUniResultTableMap(pageName,
								ruleName);
					}
				} else {
					warn("invalid pagename:" + tableName + " or match failed");
				}
			} else {
				// 数据在共享表中
				rows = crawlRSC.getGlobalShareResultTableMap(tableName);
			}

			if (rows.size() == 0) {
				break;
			}

			// 开始一张新表的处理
			if (StringUtils.isNotEmpty(exTableName)
					&& !StringUtils.equalsIgnoreCase(curTableName, exTableName)) {

				// 把mainRows和rows连接
				List<Map<String, String>> tmpRows = new ArrayList();
				for (Map<String, String> result : mainRows) {
					// 新建一条记录,然后把左面表+右面的结果 放进临时表

					for (Map<String, String> map : rows) {
						Map<String, String> tpMap = new HashMap();
						tpMap.putAll(result);
						
						//判断一下pfk.k里是否有 :toByte,如果有的话需要去掉:toByte,才能取到值
						String key=pfk.k;
						if(pfk.k.endsWith(ConfConstant.TO_BYTE)){
							int ix = pfk.k.lastIndexOf(ConfConstant.TO_BYTE);
							key=	key.substring(0, ix);
						}
						
						tpMap.put(pfk.p + "." + pfk.f, map.get(key));

						// 投影字段可能是空的
						if (pkpkMap.containsKey(pfk.p)) {
							Set<String> ext = pkpkMap.get(pfk.p);
							for (String f : ext) {
								tpMap.put(pfk.p + "." + f, map.get(f));
							}
						}
						// 临时表中添加一行
						tmpRows.add(tpMap);
					}

				}

				mainRows = tmpRows;// 笛卡尔积完成

			} else {// 继续放同一张表的数据

				if (StringUtils.isEmpty(exTableName)) {

					for (Map<String, String> map : rows) {
						Map<String, String> tpMap = new HashMap();
						
						String fld=pfk.f;
						if(fld.endsWith(ConfConstant.FUNCTOIN_POSTFIX)){
							fld=fld.substring(0,fld.length()-ConfConstant.FUNCTOIN_POSTFIX.length()  );
						}
						
						tpMap.put(pfk.p + "." + pfk.f, map.get(fld));

						// 投影字段可能是空的
						if (pkpkMap.containsKey(pfk.p)) {
							Set<String> ext = pkpkMap.get(pfk.p);
							for (String f : ext) {
								tpMap.put(pfk.p + "." + f, map.get(f));
							}
						}

						mainRows.add(tpMap);
					}
				} else {

					for (int i = 0; i < mainRows.size(); i += rows.size()) {
						
						
						//遇到有":FUNCTION"的情况下,需要处理一下
						String fld=pfk.f;
						if(fld.endsWith(ConfConstant.FUNCTOIN_POSTFIX)){
							fld=fld.substring(0,fld.length()-ConfConstant.FUNCTOIN_POSTFIX.length()  );
						}
						

						String pf = pfk.p + "." + pfk.f;
						for (int j = 0; j < rows.size(); j++) {
							mainRows.get(i + j).put(pf, rows.get(j).get(fld));

							if (pkpkMap.containsKey(pfk.p)) {
								Set<String> ext = pkpkMap.get(pfk.p);
								for (String f : ext) {
									mainRows.get(i + j).put(pfk.p + "." + f,
											rows.get(j).get(f));
								}
							}
						}
					}

				}

			}

			exTableName = tableName;

		}

		// 可以在所有表处理完之后,再处理融合的表
		for (String targetKey : mergeMap.keySet()) {

			String p = "";
			for (String pkey : pageNameMap.keySet()) {
				if (pageNameMap.get(pkey).equals(targetKey)) {
					p = pkey;
					break;
				}
			}
			if (StringUtils.isEmpty(p)) {
				ParalleIRVirtualMachine.error(
						"invalid page alias:" + targetKey, ErrorType.SEMANTIC);
			}

			if (StringUtils.isEmpty(targetKey))
				continue;

			TableMerge merge = mergeMap.get(targetKey);
			Set<String> srcTbls = merge.getMergedTable();

			List<Map<String, String>> mainMergeRows = new ArrayList();

			// 先添加一条空数据
			// mainRow.add(new HashMap());

			for (String tableName : srcTbls) {

				List<Map<String, String>> rows = new ArrayList();

				String[] prPair = tableName.split("->");

				if (prPair.length == 2) { // 是独立表
					String pageName = prPair[0].trim();
					String ruleName = prPair[1].trim();

					if (crawlRSC.getGlobalUniResultTableMap(pageName, ruleName)
							.size() == 0) {
						warn("Thread-" + threadId
								+ ":invalid uniTable pagename:" + pageName
								+ "->" + ruleName);
						continue;
					} else {
						rows = crawlRSC.getGlobalUniResultTableMap(pageName,
								ruleName);

						if (mainRows.size() == 0) {

							List<Map<String, String>> tmpRows = new ArrayList();

							for (Map<String, String> row : rows) {
								Map tmpMap = new HashMap();
								for (String columName : row.keySet()) {
									tmpMap.put(p + "." + columName, row
											.get(columName));
								}
								tmpRows.add(tmpMap);
							}
							mainRows = tmpRows;
						} else {

							// 2x2=>4
							List<Map<String, String>> tmpRows = new ArrayList();

							for (Map<String, String> mainColumn : mainRows) {

								for (Map<String, String> row : rows) {
									Map<String, String> tmpMap = new HashMap();
									for (String columName : row.keySet()) {
										tmpMap.put(p + "." + columName, row
												.get(columName));
									}
									tmpMap.putAll(mainColumn);
									tmpRows.add(tmpMap);
								}
							}
							mainRows = tmpRows;
						}

						// }
					}
				} else {// 是共享表

					String pageName = tableName.trim();
					if (crawlRSC.getGlobalShareResultTableMap(pageName).size() == 0) {
						warn("Thread-" + threadId + ":invalid  pagename:"
								+ pageName);
						continue;
					} else {
						rows = crawlRSC.getGlobalShareResultTableMap(pageName);

						if (mainRows.size() == 0) {
							mainRows = rows;
						} else {
							for (Map<String, String> mainColumn : mainRows) {
								for (Map<String, String> column : rows) {
									mainColumn.putAll(column);
								}
							}
						}

					}
				}
			}

		}

		// 直接从hierarchyPfkList引用数据
		for (Pfk pfk : hierarchyPfkList) {

			// 获取表名
			String tableName = pageNameMap.get(pfk.p);

			curTableName = tableName;

			if (!hierarchyResult.containsKey(tableName)) {
				String err = "invalid hierarchy tableName:" + tableName;
				err += ",\n if u want to specify a  hierarchy tableName, u needn't to write the matchName";
				err += ",\n because only one matchName can be used as a hierarchyTable";
				error(err);
				continue;
			}
			Map<String, String> row = hierarchyResult.get(tableName);

			// 即使是在继承表里,字段名也是用“.”连起来, 为了统一起见
			// 因此,在函数型字段的处理中,需要注意这个问题
			for (int i = 0; i < mainRows.size(); i++) {
				String pf = pfk.p + "." + pfk.f;
				mainRows.get(i).put(pf, row.get(pfk.f));
			}

		}

		return filterFinalResult(mainRows, pfkList, hierarchyPfkList, pkpkList);

	}

	public List<Map> getMapResult(String irql) throws SemanticException {

		int pm = irql.indexOf(";");
		if (-1 == pm) {
			String err = "invalid IRQL format:" + irql;
			err += "\nhave u forget to put ';' after Page Define?";
			ParalleIRVirtualMachine.error(err, ErrorType.SEMANTIC);
		}
		String pageStr = irql.substring(0, pm);

		Map<String, String> pageNameMap = new HashMap<String, String>();

		String[] fields = pageStr.split(",");
		for (String field : fields) {
			String[] kv = field.split(":");
			if (kv.length == 2) {
				pageNameMap.put(kv[1].trim(), kv[0].trim());
			} else {
				error("error pageMap description:" + field);
			}
		}

		String prefix = "";
		String postfix = "";
		int w = irql.indexOf("where");
		if (w != -1) {
			prefix = irql.substring(pm + 1, w).trim();
			postfix = irql.substring(w);
		} else {
			prefix = irql;
		}

		int s = prefix.indexOf("select");
		if (s == -1) {
			ParalleIRVirtualMachine.error("miss 'select'", ErrorType.GRAMMER);
		}
		s = s + "select".length();

		String fieldStr = prefix.substring(s).trim();

		Set<String> functionalFields = new HashSet();

		Matcher m = FUNC_PATTERN.matcher(fieldStr);

		boolean found = false;
		int last = 0;
		while (m.find(last)) {

			found = true;

			last = m.end();

			String field = m.group(0);
			
			
			

			String functionName = m.group(1);
			Function fun = FunctionNameMap.get(functionName);

			String showField = m.group(3);

			if (null == fun) {
				error("unexisting function anme :" + functionName);
			}

			// 一个参数的函数
			if (uniParameterFunction.contains(functionName)) {
				UniParameterFunction func = (UniParameterFunction) getFunctionInstance(functionName);

				field = m.group(2);

				// 避免忘记写 别名
				if (field.contains(",") || field.contains(")")) {
					error("invalid Uniunction format");
				}
				String tableFieldKey = field.replace("->", ".");
				
//				加上":FUNCTION"后缀
				tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
				
				func.setField(tableFieldKey);
				
				

				
				functionalFields.add(field);
				showFieldFunctionMap.put(showField, func);
			}

			if (doubleParameterFunction.contains(functionName)) {
				DoubleParameterFunction func = (DoubleParameterFunction) getFunctionInstance(functionName);

				String[] args = m.group(2).split(",");

				func.setParameter(ParseUtils.parseStrContent(args[1].trim()));
				if (args.length != 2) {
					error("invalid DoubleParameterFunction format");
				}
				String tableFieldKey = args[0].replace("->", ".");
				
//				加上":FUNCTION"后缀
				tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
				
				
				func.setField(tableFieldKey);
				functionalFields.add(args[0]);// 必须把这个字段加入“结果集中出现的字段”,不能漏掉
				showFieldFunctionMap.put(showField, func);

			}

			// 3个参数的函数
			if (functionName.equals(FunctionConstant.REPLACE)) {
				ReplaceFunction rpf = new ReplaceFunction();
				String[] args = m.group(2).split(",");
				if (args.length != 3) {
					error("invalid parameter for replace function");
				}

				String tableFieldKey = args[0].replace("->", ".");
				
//				加上":FUNCTION"后缀
				tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
				
				rpf.setField(tableFieldKey);

				String patternStr = args[1].replace("\\\"", "");
				String replaceStr = args[2].replace("\\\"", "");
				rpf.setPatternStr(patternStr);
				rpf.setReplaceStr(replaceStr);

				functionalFields.add(tableFieldKey);

				showFieldFunctionMap.put(showField, rpf);
			}

			// N个参数的函数
			if (functionName.equals(FunctionConstant.SPRINGTF)) {
				SprintfFunction spf = new SprintfFunction();
				String[] args = m.group(2).split(",");

				spf.setFormat(ParseUtils.parseStrContent(args[0].trim()));
				ArrayList<String> spFields = new ArrayList();
				for (int i = 1; i < args.length; i++) {
					// 这个字段名需要把共享表,独立表,继承表三种形式统一起来
					String tableFieldKey = args[i].replace("->", ".");
					
					
//					加上":FUNCTION"后缀
					tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
					
					spFields.add(tableFieldKey);
					functionalFields.add(args[i]);
				}
				spf.setFields(spFields.toArray(new String[0]));
				showFieldFunctionMap.put(showField, spf);
			}

			// 逆波兰式 递归函数
			if (functionName.equals(FunctionConstant.RECURSIVE)) {
				RecursiveFunction rf = new RecursiveFunction();
				String[] args = m.group(2).split(",");

				rf.setFunctions(ParseUtils.parseStrContent(args[0].trim()));

				List<String> params = new ArrayList();
				for (int i = 1; i < args.length; i++) {
					// 这个字段名需要把共享表,独立表,继承表三种形式统一起来

					if (!args[i].trim().startsWith("\"")) {
						String tableFieldKey = args[i].replace("->", ".");
						
//						加上":FUNCTION"后缀
						tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
						
						functionalFields.add(tableFieldKey);
						params.add(args[i]);
					} else {
						params.add(args[i]);
					}
				}
				rf.setParams(params.toArray(new String[0]));
				showFieldFunctionMap.put(showField, rf);
			}

		}

		if (found) {
			fieldStr = m.replaceAll("");
		}

		// 把fieldStr里的函数型字段找出来,换成空
		ArrayList<Pfk> pfk = new ArrayList<Pfk>();

		ArrayList<Pfk> hierarchyPfk = new ArrayList<Pfk>();

		fields = fieldStr.split(",");

		Set<String> metNames = new HashSet<String>();

		// 函数型字段和普通字段分开处理。。。函数型字段没有别名
		//给函数型字段的k值加上 “:FUNCTION”后缀,就不会把单独字段的值显示值冲掉了
		//比如 cleartag(P.f) f, P.f f2,这时候f和f2这两个字段就能分别取道正确的值了
		//f值必须是	“:FUNCTION”结尾,因为filterFinalResult函数中的if (fieldMap.containsKey(key) &&  !fieldMap.get(key).endsWith(":FUNCTION")) {
		//这段里会需要判断是否“:FUNCTION”结尾
		//而k值必须“:FUNCTION”结尾,否则会冲掉P.f的showField
		//fieldMap={"P.f:FUNCTION"=>"f1","P.f"=>"f2"}
		//只要"P.f"=>"f2这条不要被冲掉就可以了,而两个function相互冲掉是没关系的
		for (String field : functionalFields) {
			field = field.trim();

			// 链接语义的数据集
			if (field.indexOf("->") > 0) {
				String[] pf = field.trim().split("->");
				if (pf.length == 2) {
					String f = ParseUtils.getRealFieldName(pf[1]);
					//hierarchyPfk.add(new Pfk(pf[0].trim(), f, f));
					hierarchyPfk.add(new Pfk(pf[0].trim(), f+ConfConstant.FUNCTOIN_POSTFIX, f));
				} else {
					error("error prefix pf:" + field);
				}
			} else {

				String[] pf = field.trim().split("\\.");

				if (pf.length == 2) {
					String f = ParseUtils.getRealFieldName(pf[1]);
					//pfk.add(new Pfk(pf[0].trim(), f, f));
					pfk.add(new Pfk(pf[0].trim(), f+ConfConstant.FUNCTOIN_POSTFIX, f));
				} else {
					error("error prefix pf:" + field);
				}
			}
		}

		for (String field : fields) {

			if (StringUtils.isBlank(field))
				continue;

			field = field.trim();

			String[] kv = field.trim().split("\\s+");

			if (kv.length == 2) {
				// 显示字段重复性校验
				if (!metNames.contains(kv[1].trim())) {
					metNames.add(kv[1].trim());
				} else {
					error("duplicate show field:" + kv[1]);
				}

				// 链接语义的数据集 select P.songName songName,P->downlink downlink;
				if (kv[0].indexOf("->") > 0) {
					String[] pf = kv[0].split("->");
					if (pf.length == 2) {

						hierarchyPfk.add(new Pfk(pf[0].trim(), pf[1].trim(),
								kv[1].trim()));
					} else {
						error("error prefix pf:" + kv[0]);
					}
				} else {

					String[] pf = kv[0].trim().split("\\.");
					if (pf.length == 2) {

						pfk.add(new Pfk(pf[0].trim(), pf[1].trim(), kv[1]
								.trim()));
					} else {
						error("error prefix pf:" + kv[0]);
					}
				}

			} else if (kv.length == 1) { // select P.songName,P->downlink;
				// 链接语义的数据集
				if (kv[0].indexOf("->") > 0) {
					String[] pf = kv[0].trim().split("->");

					if (pf.length == 2) {
						String f = ParseUtils.getRealFieldName(pf[1]);
						// 显示字段重复性校验
						if (!metNames.contains(f)) {
							metNames.add(f);
						} else {
							error("duplicate show field:" + f);
						}

						metNames.add(pf[1]);
						hierarchyPfk.add(new Pfk(pf[0].trim(), f, f));

					} else {
						error("error prefix pf:" + kv[0]);
					}

				} else {

					String[] pf = kv[0].trim().split("\\.");

					if (pf.length == 2) {

						String f = ParseUtils.getRealFieldName(pf[1]);

						if (!metNames.contains(f)) {
							metNames.add(f);
						} else {
							error("duplicate show field:" + f);
						}
						pfk.add(new Pfk(pf[0].trim(), f, pf[1].trim()));
					} else {
						error("error prefix pf:" + kv[0]);
					}
				}
			} else {
				StringBuffer fieldsError = new StringBuffer();
				for (String f : kv) {
					fieldsError.append(f);
				}
				error("error fields description:" + fieldsError);
			}
		}// end foreach fields

		w = postfix.indexOf("where");
		if (w != -1) {
			fieldStr = postfix.substring(w + "where".length()).trim();
		} else {
			fieldStr = "";
		}

		ArrayList<Pkpk> pkpk = new ArrayList();

		fields = new String[0];

		// 投影条件
		if (StringUtils.isNotEmpty(fieldStr)) {
			fields = fieldStr.split("and");
		}

		for (String field : fields) {

			field = field.trim();
			String optag = "=";
			// 算符
			Operator operator = null;

			// 分析表达式中的算符
			for (String op : operators.keySet()) {
				if (field.contains(op)) {
					optag = op;
					operator = operators.get(op);
					break;
				}
			}

			// P1.F1=P2.F2, P1.F1=NULL
			String[] kv = field.split(optag);
			if (kv.length == 2) {// 当前只支持以上两种语法,因此这条应该肯定成立

				String tableKey = kv[0].trim().replace("->", ".");

				String[] pf = tableKey.split("\\.");

				String p1 = null, f1 = null, p2 = null, f2 = null;
				if (pf.length == 2) {
					p1 = pf[0].trim();
					f1 = pf[1].trim();
				} else {
					error("error post pf");
				}

				List args = new CopyOnWriteArrayList();

				String targetTableKey = kv[1].trim();
				// 非空的过滤条件,单参数
				if (targetTableKey.equals(ConfConstant.NULL)) {

					args.addAll(Arrays.asList(new String[] { p1, f1 }));
					Pkpk cmpNullOP = new Pkpk(ArgumentType.ONE, operator, args);
					pkpk.add(cmpNullOP);
				} else if (targetTableKey.startsWith("\"")
						&& targetTableKey.endsWith("\"")) {
					String str = targetTableKey.substring(1, targetTableKey
							.length() - 1);

					// 不等于或者等于字符串
					if (operator instanceof UneqOperator) {
						operator = new UneqStringOperator(str);
					} else {
						operator = new EqStringOperator(str);
					}
					args.addAll(Arrays.asList(new String[] { p1, f1, str }));
					Pkpk cmpStrOP = new Pkpk(ArgumentType.ONE, operator, args);
					pkpk.add(cmpStrOP);

				} else {

					pf = kv[1].trim().split("\\.");
					if (pf.length == 2) {
						p2 = pf[0].trim();
						f2 = pf[1].trim();
						args.addAll(Arrays
								.asList(new String[] { p1, f1, p2, f2 }));
					} else {
						error("error post pf");
					}
					Pkpk cmpOP = new Pkpk(ArgumentType.TWO, operator, args);
					pkpk.add(cmpOP);
				}

			} else {
				error("error fields description");
			}
		}

		return getFinalResult(pageNameMap, pfk, hierarchyPfk, pkpk);

	}

	public static void main(String[] argv) {

		// 构造源数据
		Map<String, List<Map<String, String>>> pageMap = new HashMap();

		// P1的数据
		List<Map<String, String>> page = new ArrayList();
		HashMap map = new HashMap();
		map.put("bcid", "v_bcid");
		map.put("bcname", "v_bcname");
		page.add(map);

		map = new HashMap();
		map.put("bcid", "v_bcid1");
		map.put("bcname", "v_bcname1");
		page.add(map);

		pageMap.put("pagename1", page);

		// P2的数据
		page = new ArrayList();
		map = new HashMap();
		map.put("bcid", "v_bcid");
		map.put("scid", "v_sci");
		map.put("scname", "v_scname");
		page.add(map);

		map = new HashMap();
		map.put("bcid", "v_bcid1");
		map.put("scid", "v_sci1");
		map.put("scname", "v_scname1");

		page.add(map);

		map = new HashMap();
		map.put("bcid", "v_bcid1");
		map.put("scid", "v_sci2");
		map.put("scname", "v_scname2");
		page.add(map);
		pageMap.put("pagename2", page);

		// P3的数据
		page = new ArrayList();
		map = new HashMap();
		map.put("scid", "v_sci");
		map.put("scname", "v_scname");
		map.put("songname", "v_songname1");
		map.put("downlink", "v_downlink1");
		page.add(map);

		map = new HashMap();
		map.put("scid", "v_sci");
		map.put("scname", "v_scname");
		map.put("songname", "v_songname2");
		map.put("downlink", "v_downlink2");
		page.add(map);

		map = new HashMap();
		map.put("scid", "v_sci1");
		map.put("scname", "v_scname");
		map.put("songname", "v_songname3");
		map.put("downlink", "v_downlink3");
		page.add(map);

		map = new HashMap();
		map.put("scid", "v_sci1");
		map.put("scname", "v_scname");
		map.put("songname", "v_songname4");
		map.put("downlink", "v_downlink4");
		page.add(map);

		pageMap.put("pagename3", page);

		Map<String, Map<String, String>> hierarchy = new HashMap();

		// Map<String, List<Map<String, String>>> curUniResultTableMap =new
		// ConcurrentHashMap();
		// ResutTree resutTree = new ResutTree(pageMap,
		// hierarchy,curUniResultTableMap);
		// resutTree.setPageMap(pageMap);
		//
		// String str = "pagename1:P1,pagename2:P2,pagename3:P3;select P1.bcname
		// bcname,P2.scname sncame,P3.songname songname,P3.downlink downlink"
		// + " downlink where P1.bcid=P2.bcid and P2.scid=P3.scid; dao->insert
		// ";
		//
		// int semi = str.lastIndexOf(";");
		//
		// String dao = str.substring(semi + 1);
		// String irql = str.substring(0, semi);
		//
		// ArrayList<Map> am = resutTree.getMapResult(irql);
	}

	public void setMergeMap(Map<String, TableMerge> mergeMap) {
		this.mergeMap = mergeMap;
	}

}
分享到:
评论

相关推荐

    IRQL-thread-中文翻译.doc

    windows驱动 中断请求等级 IRQL 微软IRQL_thread文档翻译 中文

    Scheduling, Thread Context, and IRQL.pdf

    Scheduling, Thread Context, and IRQL.pdf

    Scheduling, Thread Context, and IRQL

    This paper presents information about how thread scheduling, thread context, and a processor’s current interrupt request level (IRQL) affect the operation of kernel-mode drivers for the Microsoft® ...

    Scheduling, Thread Context, and IRQL.doc

    Scheduling, Thread Context, and IRQL.doc

    深入解析windows 操作系统第6版第2-3章.中文版扫描

    深入解析windows 操作系统第6版第2-3章.Russinovich.范德成.中文.扫描 内容目录 第2章系统架构 33 2.1 需求和设计目标 33 2.2 操作系统模型 34 2.3 总体架构 35 可移植性 37 对称多处理 38 可伸缩性 40 客户...

    深入解析Windows操作系统中文.part2.rar

    公共信息模型(CIM)和可管理对象的格式语言 240 WMI名字空间 243 类关联 244 WMI实现 247 WMI安全性 248 4.4 本章总结 249 第5章 启动和停机 251 5.1 引导过程 251 x86和x64引导准备 251 x86/x64引导扇区和Ntldr ...

    电脑蓝屏代码解决方法

    1、0x0000000A:IRQL_NOT_LESS_OR_EQUAL .◆错误分析:主要是由问题的驱动程序、有缺陷或不兼容的硬件与软件造成的. 从技术角度讲. 表明在内核模式中有级别进程请求(IRQL)访问其没有权限访问的内存地址. .◇解决...

    windows帮助文档

    Scheduling, Thread Context, and IRQL Locks, Deadlocks, and Synchronization

    操作系统蓝屏错误小全

    蓝屏错误小全:是否你的系统经常蓝屏,请查看我总结的内容。

    简单的蓝屏分析器(用JAVA写的)

    用java写的简单的蓝屏分析器,总结出现蓝屏的各种原因,并提出相应的解决方案

    教你如何解决某些系统蓝屏问题

    问题描述:在论坛上发贴、QQ空间发表日志、网上充值、上某些QQ号码,QQ发表自定义表情等出现蓝屏,文本里说明了如何查看错误原因以及解决办法

    蓝屏部分代码 查询

    └─┘0x0000000A:IRQL_NOT_LESS_OR_EQUAL ◆错误分析:主要是由问题的驱动程序、有缺陷或不兼容的硬件与软件造成的. 从技术角度讲. 表明 在内核模式中存在以太高的进程内部请求级别(IRQL)访问其没有权限访问的...

    Windows驱动程序(WDM)关键概念阐释

    微软关于WDM驱动程序中关键概念的阐释,包括IRP,IRQL,Spin-Lock,取消逻辑,IRP挂起,完成例程,多处理器问题,内存管理,同步处理,用户模式和内核模式,绝对值得收藏!

    iastor.sys 蓝屏问题解决方法

    蓝屏DRIVER_IRQL_NOT_LESS_OR_EQUAL:提示iastor.sys有问题。 查看本本xp sp3的iastor.sys的版本是 8.6.0.1007。网上查到8.6.0.1007的驱动是带有很大BUG的驱动,对xp不友好,对vista也有点问题。 解决方法:下载新版...

    Windows常见错误.txt

    Windows常见错误 1.停止错误编号:0x0000000A  说明文字:IRQL-NOT-LESS-OR-EQUAL

    windows非法操作错误编号详解.doc

    停止错误编号:OX0000000A说明文字:IRQL-NOT-LESS-OR-EQUAL 通常的原因:驱动程序使用了不正确的内存地址. 解决方法:如果无法登陆,则重新启动计算机.当出现可用的作系统列表时,按F8键.在Windows高级选项菜单屏幕上,...

    系统蓝屏表.蓝屏代码

    0x0000000A:IRQL_NOT_LESS_OR_EQUAL ◆错误分析:主要是由问题的驱动程序、有缺陷或不兼容的硬件与软件造成的. 从技术角 度讲. 表明在内核模式中存在以太高的进程内部请求级别(IRQL)访问其没有权限访问的 内存地址...

    帮你看轻松懂蓝屏代码

    经典蓝屏案意义破解 0x0000000A:IRQL_NOT_LESS_OR_EQUAL ◆错误分析:主要是由问题的驱动程序、有缺陷或不兼容的硬件与软件造成的. 从技术角度讲. 表明在内核模式中存在以太高的进程内部请求级别(IRQL)访问其没有...

    USB-网线转换器驱动

    rtux64w10.sys导致DRIVER_IRQL_NOT_LESS_OR_EQUAL蓝屏的解决方法 Realtek USB FE/GbE NIC NDIS6.40 64-bit Driver的驱动2020

    WIN10USB转串口驱动适合win1064位.zip

    在win10系统下,在更新USB转串口驱动后,出现黄色叹号,无法识别设备,然后串口无法正常使用,下载安装USB转串口驱动后,每次插USB转串口线,电脑就会蓝屏,终止代码:IRQL NOT LESS OR EQUAL 下载此驱动,拔下电脑...

Global site tag (gtag.js) - Google Analytics